[ { "title": "$\\ell_1$-regression with Heavy-tailed Distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11127", "id": "11127", "author_site": "Lijun Zhang, Zhi-Hua Zhou", "author": "Lijun Zhang; Zhi-Hua Zhou", "abstract": "In this paper, we consider the problem of linear regression with heavy-tailed distributions. Different from previous studies that use the squared loss to measure the performance, we choose the absolute loss, which is capable of estimating the conditional median. To address the challenge that both the input and output could be heavy-tailed, we propose a truncated minimization problem, and demonstrate that it enjoys an $O(\\sqrt{d/n})$ excess risk, where $d$ is the dimensionality and $n$ is the number of samples. Compared with traditional work on $\\ell_1$-regression, the main advantage of our result is that we achieve a high-probability risk bound without exponential moment conditions on the input and output. Furthermore, if the input is bounded, we show that the classical empirical risk minimization is competent for $\\ell_1$-regression even when the output is heavy-tailed.", "bibtex": "@inproceedings{NEURIPS2018_8b16ebc0,\n author = {Zhang, Lijun and Zhou, Zhi-Hua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {\\textbackslash ell\\_1-regression with Heavy-tailed Distributions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8b16ebc056e613024c057be590b542eb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8b16ebc056e613024c057be590b542eb-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8b16ebc056e613024c057be590b542eb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8b16ebc056e613024c057be590b542eb-Reviews.html", "metareview": "", "pdf_size": 358099, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17689350048145002265&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing 210023, China; National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing 210023, China", "aff_domain": "lamda.nju.edu.cn;lamda.nju.edu.cn", "email": "lamda.nju.edu.cn;lamda.nju.edu.cn", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8b16ebc056e613024c057be590b542eb-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "National Key Laboratory for Novel Software Technology", "aff_unique_url": "http://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Nanjing", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "(Probably) Concave Graph Matching", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11065", "id": "11065", "author_site": "Haggai Maron, Yaron Lipman", "author": "Haggai Maron; Yaron Lipman", "abstract": "In this paper we address the graph matching problem. Following the recent works of \\cite{zaslavskiy2009path,Vestner2017} we analyze and generalize the idea of concave relaxations. We introduce the concepts of \\emph{conditionally concave} and \\emph{probably conditionally concave} energies on polytopes and show that they encapsulate many instances of the graph matching problem, including matching Euclidean graphs and graphs on surfaces. We further prove that local minima of probably conditionally concave energies on general matching polytopes (\\eg, doubly stochastic) are with high probability extreme points of the matching polytope (\\eg, permutations).", "bibtex": "@inproceedings{NEURIPS2018_eda80a3d,\n author = {Maron, Haggai and Lipman, Yaron},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {(Probably) Concave Graph Matching},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/eda80a3d5b344bc40f3bc04f65b7a357-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/eda80a3d5b344bc40f3bc04f65b7a357-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/eda80a3d5b344bc40f3bc04f65b7a357-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/eda80a3d5b344bc40f3bc04f65b7a357-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/eda80a3d5b344bc40f3bc04f65b7a357-Reviews.html", "metareview": "", "pdf_size": 2689598, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12811719695744123806&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Weizmann Institute of Science; Weizmann Institute of Science", "aff_domain": "weizmann.ac.il;weizmann.ac.il", "email": "weizmann.ac.il;weizmann.ac.il", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/eda80a3d5b344bc40f3bc04f65b7a357-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Weizmann Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.weizmann.org.il", "aff_unique_abbr": "Weizmann", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "3D Steerable CNNs: Learning Rotationally Equivariant Features in Volumetric Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11982", "id": "11982", "author_site": "Maurice Weiler, Wouter Boomsma, Mario Geiger, Max Welling, Taco Cohen", "author": "Maurice Weiler; Mario Geiger; Max Welling; Wouter Boomsma; Taco S Cohen", "abstract": "We present a convolutional network that is equivariant to rigid body motions. The model uses scalar-, vector-, and tensor fields over 3D Euclidean space to represent data, and equivariant convolutions to map between such representations. These SE(3)-equivariant convolutions utilize kernels which are parameterized as a linear combination of a complete steerable kernel basis, which is derived analytically in this paper. We prove that equivariant convolutions are the most general equivariant linear maps between fields over R^3. Our experimental results confirm the effectiveness of 3D Steerable CNNs for the problem of amino acid propensity prediction and protein structure classification, both of which have inherent SE(3) symmetry.", "bibtex": "@inproceedings{NEURIPS2018_488e4104,\n author = {Weiler, Maurice and Geiger, Mario and Welling, Max and Boomsma, Wouter and Cohen, Taco S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {3D Steerable CNNs: Learning Rotationally Equivariant Features in Volumetric Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/488e4104520c6aab692863cc1dba45af-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/488e4104520c6aab692863cc1dba45af-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/488e4104520c6aab692863cc1dba45af-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/488e4104520c6aab692863cc1dba45af-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/488e4104520c6aab692863cc1dba45af-Reviews.html", "metareview": "", "pdf_size": 975332, "gs_citation": 618, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10898598436815000986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "University of Amsterdam; EPFL; University of Amsterdam + CIFAR + Qualcomm AI Research; University of Copenhagen; Qualcomm AI Research", "aff_domain": "uva.nl;epfl.ch;uva.nl;di.ku.dk;gmail.com", "email": "uva.nl;epfl.ch;uva.nl;di.ku.dk;gmail.com", "github": "https://github.com/mariogeiger/se3cnn", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/488e4104520c6aab692863cc1dba45af-Abstract.html", "aff_unique_index": "0;1;0+2+3;4;3", "aff_unique_norm": "University of Amsterdam;EPFL;Canadian Institute for Advanced Research;Qualcomm;University of Copenhagen", "aff_unique_dep": ";;;Qualcomm AI Research;", "aff_unique_url": "https://www.uva.nl;https://www.epfl.ch;https://www.cifar.ca;https://www.qualcomm.com/research;https://www.ku.dk", "aff_unique_abbr": "UvA;EPFL;CIFAR;QAI;UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0+2+3;4;3", "aff_country_unique": "Netherlands;Switzerland;Canada;United States;Denmark" }, { "title": "3D-Aware Scene Manipulation via Inverse Graphics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11201", "id": "11201", "author_site": "Shunyu Yao, Tzu Ming Hsu, Jun-Yan Zhu, Jiajun Wu, Antonio Torralba, Bill Freeman, Josh Tenenbaum", "author": "Shunyu Yao; Tzu Ming Hsu; Jun-Yan Zhu; Jiajun Wu; Antonio Torralba; Bill Freeman; Josh Tenenbaum", "abstract": "We aim to obtain an interpretable, expressive, and disentangled scene representation that contains comprehensive structural and textural information for each object. Previous scene representations learned by neural networks are often uninterpretable, limited to a single object, or lacking 3D knowledge. In this work, we propose 3D scene de-rendering networks (3D-SDN) to address the above issues by integrating disentangled representations for semantics, geometry, and appearance into a deep generative model. Our scene encoder performs inverse graphics, translating a scene into a structured object-wise representation. Our decoder has two components: a differentiable shape renderer and a neural texture generator. The disentanglement of semantics, geometry, and appearance supports 3D-aware scene manipulation, e.g., rotating and moving objects freely while keeping the consistent shape and texture, and changing the object appearance without affecting its shape. Experiments demonstrate that our editing scheme based on 3D-SDN is superior to its 2D counterpart.", "bibtex": "@inproceedings{NEURIPS2018_64223ccf,\n author = {Yao, Shunyu and Hsu, Tzu Ming and Zhu, Jun-Yan and Wu, Jiajun and Torralba, Antonio and Freeman, Bill and Tenenbaum, Josh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {3D-Aware Scene Manipulation via Inverse Graphics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/64223ccf70bbb65a3a4aceac37e21016-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/64223ccf70bbb65a3a4aceac37e21016-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/64223ccf70bbb65a3a4aceac37e21016-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/64223ccf70bbb65a3a4aceac37e21016-Reviews.html", "metareview": "", "pdf_size": 4949539, "gs_citation": 318, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1601238761105816866&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "IIIS, Tsinghua University+MIT CSAIL; MIT CSAIL; MIT CSAIL; MIT CSAIL; MIT CSAIL; MIT CSAIL, Google Research; MIT CSAIL", "aff_domain": "tsinghua.edu.cn;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "email": "tsinghua.edu.cn;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/64223ccf70bbb65a3a4aceac37e21016-Abstract.html", "aff_unique_index": "0+1;1;1;1;1;1;1", "aff_unique_norm": "Tsinghua University;Massachusetts Institute of Technology", "aff_unique_dep": "Institute for Interdisciplinary Information Sciences;Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.csail.mit.edu", "aff_unique_abbr": "THU;MIT CSAIL", "aff_campus_unique_index": "1;1;1;1;1;1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0+1;1;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "title": "A Bandit Approach to Sequential Experimental Design with False Discovery Control", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11366", "id": "11366", "author_site": "Kevin Jamieson, Lalit Jain", "author": "Kevin G. Jamieson; Lalit Jain", "abstract": "We propose a new adaptive sampling approach to multiple testing which aims to maximize statistical power while ensuring anytime false discovery control. We consider $n$ distributions whose means are partitioned by whether they are below or equal to a baseline (nulls), versus above the baseline (true positives). In addition, each distribution can be sequentially and repeatedly sampled. Using techniques from multi-armed bandits, we provide an algorithm that takes as few samples as possible to exceed a target true positive proportion (i.e. proportion of true positives discovered) while giving anytime control of the false discovery proportion (nulls predicted as true positives). Our sample complexity results match known information theoretic lower bounds and through simulations we show a substantial performance improvement over uniform sampling and an adaptive elimination style algorithm. Given the simplicity of the approach, and its sample efficiency, the method has promise for wide adoption in the biological sciences, clinical testing for drug discovery, and maximization of click through in A/B/n testing problems.", "bibtex": "@inproceedings{NEURIPS2018_82cadb06,\n author = {Jamieson, Kevin G and Jain, Lalit},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Bandit Approach to Sequential Experimental Design with False Discovery Control},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/82cadb0649a3af4968404c9f6031b233-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/82cadb0649a3af4968404c9f6031b233-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/82cadb0649a3af4968404c9f6031b233-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/82cadb0649a3af4968404c9f6031b233-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/82cadb0649a3af4968404c9f6031b233-Reviews.html", "metareview": "", "pdf_size": 755505, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7974128352790450011&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff": "Paul G. Allen School of Computer Science & Engineering, University of Washington, Seattle, WA + Optimizely, San Francisco, CA; Paul G. Allen School of Computer Science & Engineering, University of Washington, Seattle, WA", "aff_domain": "cs.washington.edu;cs.washington.edu", "email": "cs.washington.edu;cs.washington.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/82cadb0649a3af4968404c9f6031b233-Abstract.html", "aff_unique_index": "0+1;0", "aff_unique_norm": "University of Washington;Optimizely", "aff_unique_dep": "Paul G. Allen School of Computer Science & Engineering;", "aff_unique_url": "https://www.washington.edu;https://www.optimizely.com", "aff_unique_abbr": "UW;", "aff_campus_unique_index": "0+1;0", "aff_campus_unique": "Seattle;San Francisco", "aff_country_unique_index": "0+0;0", "aff_country_unique": "United States" }, { "title": "A Bayes-Sard Cubature Method", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11572", "id": "11572", "author_site": "Toni Karvonen, Chris Oates, Simo Sarkka", "author": "Toni Karvonen; Chris J Oates; Simo Sarkka", "abstract": "This paper focusses on the formulation of numerical integration as an inferential task. To date, research effort has largely focussed on the development of Bayesian cubature, whose distributional output provides uncertainty quantification for the integral. However, the point estimators associated to Bayesian cubature can be inaccurate and acutely sensitive to the prior when the domain is high-dimensional. To address these drawbacks we introduce Bayes-Sard cubature, a probabilistic framework that combines the flexibility of Bayesian cubature with the robustness of classical cubatures which are well-established. This is achieved by considering a Gaussian process model for the integrand whose mean is a parametric regression model, with an improper prior on each regression coefficient. The features in the regression model consist of test functions which are guaranteed to be exactly integrated, with remaining degrees of freedom afforded to the non-parametric part. The asymptotic convergence of the Bayes-Sard cubature method is established and the theoretical results are numerically verified. In particular, we report two orders of magnitude reduction in error compared to Bayesian cubature in the context of a high-dimensional financial integral.", "bibtex": "@inproceedings{NEURIPS2018_6775a063,\n author = {Karvonen, Toni and Oates, Chris J and Sarkka, Simo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Bayes-Sard Cubature Method},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6775a0635c302542da2c32aa19d86be0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6775a0635c302542da2c32aa19d86be0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6775a0635c302542da2c32aa19d86be0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6775a0635c302542da2c32aa19d86be0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6775a0635c302542da2c32aa19d86be0-Reviews.html", "metareview": "", "pdf_size": 503485, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14199713727894479287&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 19, "aff": "Aalto University, Finland; Newcastle University, UK + Alan Turing Institute, UK; Aalto University, Finland", "aff_domain": "aalto.fi;ncl.ac.uk;aalto.fi", "email": "aalto.fi;ncl.ac.uk;aalto.fi", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6775a0635c302542da2c32aa19d86be0-Abstract.html", "aff_unique_index": "0;1+2;0", "aff_unique_norm": "Aalto University;Newcastle University;Alan Turing Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.aalto.fi;https://www.ncl.ac.uk;https://www.turing.ac.uk", "aff_unique_abbr": "Aalto;NU;ATI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1+1;0", "aff_country_unique": "Finland;United Kingdom" }, { "title": "A Bayesian Approach to Generative Adversarial Imitation Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11715", "id": "11715", "author_site": "Wonseok Jeon, Seokin Seo, Kee-Eung Kim", "author": "Wonseok Jeon; Seokin Seo; Kee-Eung Kim", "abstract": "Generative adversarial training for imitation learning has shown promising results on high-dimensional and continuous control tasks. This paradigm is based on reducing the imitation learning problem to the density matching problem, where the agent iteratively refines the policy to match the empirical state-action visitation frequency of the expert demonstration. Although this approach has shown to robustly learn to imitate even with scarce demonstration, one must still address the inherent challenge that collecting trajectory samples in each iteration is a costly operation. To address this issue, we first propose a Bayesian formulation of generative adversarial imitation learning (GAIL), where the imitation policy and the cost function are represented as stochastic neural networks. Then, we show that we can significantly enhance the sample efficiency of GAIL leveraging the predictive density of the cost, on an extensive set of imitation learning tasks with high-dimensional states and actions.", "bibtex": "@inproceedings{NEURIPS2018_943aa0fc,\n author = {Jeon, Wonseok and Seo, Seokin and Kim, Kee-Eung},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Bayesian Approach to Generative Adversarial Imitation Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/943aa0fcda4ee2901a7de9321663b114-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/943aa0fcda4ee2901a7de9321663b114-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/943aa0fcda4ee2901a7de9321663b114-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/943aa0fcda4ee2901a7de9321663b114-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/943aa0fcda4ee2901a7de9321663b114-Reviews.html", "metareview": "", "pdf_size": 524859, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10235948747774571397&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "School of Computing, KAIST, Republic of Korea; School of Computing, KAIST, Republic of Korea; School of Computing, KAIST, Republic of Korea+PROWLER.io", "aff_domain": "ai.kaist.ac.kr;ai.kaist.ac.kr;cs.kaist.ac.kr", "email": "ai.kaist.ac.kr;ai.kaist.ac.kr;cs.kaist.ac.kr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/943aa0fcda4ee2901a7de9321663b114-Abstract.html", "aff_unique_index": "0;0;0+1", "aff_unique_norm": "KAIST;PROWLER.io", "aff_unique_dep": "School of Computing;", "aff_unique_url": "https://www.kaist.ac.kr;https://prowler.io", "aff_unique_abbr": "KAIST;PROWLER.io", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+1", "aff_country_unique": "South Korea;United Kingdom" }, { "title": "A Bayesian Nonparametric View on Count-Min Sketch", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11837", "id": "11837", "author_site": "Diana Cai, Michael Mitzenmacher, Ryan Adams", "author": "Diana Cai; Michael Mitzenmacher; Ryan P. Adams", "abstract": "The count-min sketch is a time- and memory-efficient randomized data structure that provides a point estimate of the number of times an item has appeared in a data stream. The count-min sketch and related hash-based data structures are ubiquitous in systems that must track frequencies of data such as URLs, IP addresses, and language n-grams. We present a Bayesian view on the count-min sketch, using the same data structure, but providing a posterior distribution over the frequencies that characterizes the uncertainty arising from the hash-based approximation. In particular, we take a nonparametric approach and consider tokens generated from a Dirichlet process (DP) random measure, which allows for an unbounded number of unique tokens. Using properties of the DP, we show that it is possible to straightforwardly compute posterior marginals of the unknown true counts and that the modes of these marginals recover the count-min sketch estimator, inheriting the associated probabilistic guarantees. Using simulated data with known ground truth, we investigate the properties of these estimators. Lastly, we also study a modified problem in which the observation stream consists of collections of tokens (i.e., documents) arising from a random measure drawn from a stable beta process, which allows for power law scaling behavior in the number of unique tokens.", "bibtex": "@inproceedings{NEURIPS2018_0b9e57c4,\n author = {Cai, Diana and Mitzenmacher, Michael and Adams, Ryan P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Bayesian Nonparametric View on Count-Min Sketch},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0b9e57c46de934cee33b0e8d1839bfc2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0b9e57c46de934cee33b0e8d1839bfc2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0b9e57c46de934cee33b0e8d1839bfc2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0b9e57c46de934cee33b0e8d1839bfc2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0b9e57c46de934cee33b0e8d1839bfc2-Reviews.html", "metareview": "", "pdf_size": 2742298, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2317502123539533443&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Princeton University; Harvard University; Princeton University", "aff_domain": "cs.princeton.edu;eecs.harvard.edu;princeton.edu", "email": "cs.princeton.edu;eecs.harvard.edu;princeton.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0b9e57c46de934cee33b0e8d1839bfc2-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Princeton University;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.harvard.edu", "aff_unique_abbr": "Princeton;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Block Coordinate Ascent Algorithm for Mean-Variance Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11126", "id": "11126", "author_site": "Tengyang Xie, Bo Liu, Yangyang Xu, Mohammad Ghavamzadeh, Yinlam Chow, Daoming Lyu, Daesub Yoon", "author": "Tengyang Xie; Bo Liu; Yangyang Xu; Mohammad Ghavamzadeh; Yinlam Chow; Daoming Lyu; Daesub Yoon", "abstract": "Risk management in dynamic decision problems is a primary concern in many fields, including financial investment, autonomous driving, and healthcare. The mean-variance function is one of the most widely used objective functions in risk management due to its simplicity and interpretability. Existing algorithms for mean-variance optimization are based on multi-time-scale stochastic approximation, whose learning rate schedules are often hard to tune, and have only asymptotic convergence proof. In this paper, we develop a model-free policy search framework for mean-variance optimization with finite-sample error bound analysis (to local optima). Our starting point is a reformulation of the original mean-variance function with its Fenchel dual, from which we propose a stochastic block coordinate ascent policy search algorithm. Both the asymptotic convergence guarantee of the last iteration's solution and the convergence rate of the randomly picked solution are provided, and their applicability is demonstrated on several benchmark domains.", "bibtex": "@inproceedings{NEURIPS2018_4e4b5fbb,\n author = {Xie, Tengyang and Liu, Bo and Xu, Yangyang and Ghavamzadeh, Mohammad and Chow, Yinlam and Lyu, Daoming and Yoon, Daesub},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Block Coordinate Ascent Algorithm for Mean-Variance Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4e4b5fbbbb602b6d35bea8460aa8f8e5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4e4b5fbbbb602b6d35bea8460aa8f8e5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4e4b5fbbbb602b6d35bea8460aa8f8e5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4e4b5fbbbb602b6d35bea8460aa8f8e5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4e4b5fbbbb602b6d35bea8460aa8f8e5-Reviews.html", "metareview": "", "pdf_size": 489600, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5015252603346788247&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "UMass Amherst; Auburn University; Rensselaer Polytechnic Institute; Facebook AI Research; Google DeepMind; Auburn University; ETRI", "aff_domain": "cs.umass.edu;auburn.edu;rpi.edu;fb.com;google.com;auburn.edu;etri.re.kr", "email": "cs.umass.edu;auburn.edu;rpi.edu;fb.com;google.com;auburn.edu;etri.re.kr", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4e4b5fbbbb602b6d35bea8460aa8f8e5-Abstract.html", "aff_unique_index": "0;1;2;3;4;1;5", "aff_unique_norm": "University of Massachusetts Amherst;Auburn University;Rensselaer Polytechnic Institute;Meta;Google;Electronics and Telecommunications Research Institute", "aff_unique_dep": ";;;Facebook AI Research;Google DeepMind;", "aff_unique_url": "https://www.umass.edu;https://www.auburn.edu;https://www.rpi.edu;https://research.facebook.com;https://deepmind.com;https://www.etri.re.kr", "aff_unique_abbr": "UMass Amherst;Auburn;RPI;FAIR;DeepMind;ETRI", "aff_campus_unique_index": "0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0;1;0;2", "aff_country_unique": "United States;United Kingdom;South Korea" }, { "title": "A Bridging Framework for Model Optimization and Deep Propagation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11427", "id": "11427", "author_site": "Risheng Liu, Shichao Cheng, xiaokun liu, Long Ma, Xin Fan, Zhongxuan Luo", "author": "Risheng Liu; Shichao Cheng; xiaokun liu; Long Ma; Xin Fan; Zhongxuan Luo", "abstract": "Optimizing task-related mathematical model is one of the most fundamental methodologies in statistic and learning areas. However, generally designed schematic iterations may hard to investigate complex data distributions in real-world applications. Recently, training deep propagations (i.e., networks) has gained promising performance in some particular tasks. Unfortunately, existing networks are often built in heuristic manners, thus lack of principled interpretations and solid theoretical supports. In this work, we provide a new paradigm, named Propagation and Optimization based Deep Model (PODM), to bridge the gaps between these different mechanisms (i.e., model optimization and deep propagation). On the one hand, we utilize PODM as a deeply trained solver for model optimization. Different from these existing network based iterations, which often lack theoretical investigations, we provide strict convergence analysis for PODM in the challenging nonconvex and nonsmooth scenarios. On the other hand, by relaxing the model constraints and performing end-to-end training, we also develop a PODM based strategy to integrate domain knowledge (formulated as models) and real data distributions (learned by networks), resulting in a generic ensemble framework for challenging real-world applications. Extensive experiments verify our theoretical results and demonstrate the superiority of PODM against these state-of-the-art approaches.", "bibtex": "@inproceedings{NEURIPS2018_1a0a283b,\n author = {Liu, Risheng and Cheng, Shichao and liu, xiaokun and Ma, Long and Fan, Xin and Luo, Zhongxuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Bridging Framework for Model Optimization and Deep Propagation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1a0a283bfe7c549dee6c638a05200e32-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1a0a283bfe7c549dee6c638a05200e32-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1a0a283bfe7c549dee6c638a05200e32-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1a0a283bfe7c549dee6c638a05200e32-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1a0a283bfe7c549dee6c638a05200e32-Reviews.html", "metareview": "", "pdf_size": 7939939, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7352311289648497590&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff": "International School of Information Science & Engineering, Dalian University of Technology + Key Laboratory for Ubiquitous Network and Service Software of Liaoning Province; School of Mathematical Science, Dalian University of Technology; International School of Information Science & Engineering, Dalian University of Technology; International School of Information Science & Engineering, Dalian University of Technology; International School of Information Science & Engineering, Dalian University of Technology + Key Laboratory for Ubiquitous Network and Service Software of Liaoning Province; Key Laboratory for Ubiquitous Network and Service Software of Liaoning Province + School of Mathematical Science, Dalian University of Technology", "aff_domain": "dlut.edu.cn; ; ; ; ; ", "email": "dlut.edu.cn; ; ; ; ; ", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1a0a283bfe7c549dee6c638a05200e32-Abstract.html", "aff_unique_index": "0+1;0;0;0;0+1;1+0", "aff_unique_norm": "Dalian University of Technology;Liaoning Province Key Laboratory for Ubiquitous Network and Service Software", "aff_unique_dep": "International School of Information Science & Engineering;Key Laboratory for Ubiquitous Network and Service Software", "aff_unique_url": "http://en.dlut.edu.cn/;", "aff_unique_abbr": "DUT;", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Dalian;", "aff_country_unique_index": "0+0;0;0;0;0+0;0+0", "aff_country_unique": "China" }, { "title": "A Convex Duality Framework for GANs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11513", "id": "11513", "author_site": "Farzan Farnia, David Tse", "author": "Farzan Farnia; David Tse", "abstract": "Generative adversarial network (GAN) is a minimax game between a generator mimicking the true model and a discriminator distinguishing the samples produced by the generator from the real training samples. Given an unconstrained discriminator able to approximate any function, this game reduces to finding the generative model minimizing a divergence measure, e.g. the Jensen-Shannon (JS) divergence, to the data distribution. However, in practice the discriminator is constrained to be in a smaller class F such as neural nets. Then, a natural question is how the divergence minimization interpretation changes as we constrain F. In this work, we address this question by developing a convex duality framework for analyzing GANs. For a convex set F, this duality framework interprets the original GAN formulation as finding the generative model with minimum JS-divergence to the distributions penalized to match the moments of the data distribution, with the moments specified by the discriminators in F. We show that this interpretation more generally holds for f-GAN and Wasserstein GAN. As a byproduct, we apply the duality framework to a hybrid of f-divergence and Wasserstein distance. Unlike the f-divergence, we prove that the proposed hybrid divergence changes continuously with the generative model, which suggests regularizing the discriminator's Lipschitz constant in f-GAN and vanilla GAN. We numerically evaluate the power of the suggested regularization schemes for improving GAN's training performance.", "bibtex": "@inproceedings{NEURIPS2018_831caa1b,\n author = {Farnia, Farzan and Tse, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Convex Duality Framework for GANs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/831caa1b600f852b7844499430ecac17-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/831caa1b600f852b7844499430ecac17-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/831caa1b600f852b7844499430ecac17-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/831caa1b600f852b7844499430ecac17-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/831caa1b600f852b7844499430ecac17-Reviews.html", "metareview": "", "pdf_size": 2399476, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16417773428232646710&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Electrical Engineering, Stanford University; Department of Electrical Engineering, Stanford University", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/831caa1b600f852b7844499430ecac17-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Department of Electrical Engineering", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Deep Bayesian Policy Reuse Approach Against Non-Stationary Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11116", "id": "11116", "author_site": "YAN ZHENG, Zhaopeng Meng, Jianye Hao, Zongzhang Zhang, Tianpei Yang, Changjie Fan", "author": "YAN ZHENG; Zhaopeng Meng; Jianye Hao; Zongzhang Zhang; Tianpei Yang; Changjie Fan", "abstract": "In multiagent domains, coping with non-stationary agents that change behaviors from time to time is a challenging problem, where an agent is usually required to be able to quickly detect the other agent's policy during online interaction, and then adapt its own policy accordingly. This paper studies efficient policy detecting and reusing techniques when playing against non-stationary agents in Markov games. We propose a new deep BPR+ algorithm by extending the recent BPR+ algorithm with a neural network as the value-function approximator. To detect policy accurately, we propose the \\textit{rectified belief model} taking advantage of the \\textit{opponent model} to infer the other agent's policy from reward signals and its behaviors. Instead of directly storing individual policies as BPR+, we introduce \\textit{distilled policy network} that serves as the policy library in BPR+, using policy distillation to achieve efficient online policy learning and reuse. Deep BPR+ inherits all the advantages of BPR+ and empirically shows better performance in terms of detection accuracy, cumulative rewards and speed of convergence compared to existing algorithms in complex Markov games with raw visual inputs.", "bibtex": "@inproceedings{NEURIPS2018_85422afb,\n author = {ZHENG, YAN and Meng, Zhaopeng and Hao, Jianye and Zhang, Zongzhang and Yang, Tianpei and Fan, Changjie},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Deep Bayesian Policy Reuse Approach Against Non-Stationary Agents},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/85422afb467e9456013a2a51d4dff702-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/85422afb467e9456013a2a51d4dff702-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/85422afb467e9456013a2a51d4dff702-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/85422afb467e9456013a2a51d4dff702-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/85422afb467e9456013a2a51d4dff702-Reviews.html", "metareview": "", "pdf_size": 1230374, "gs_citation": 103, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9941583717238816282&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "College of Intelligence and Computing, Tianjin University, Tianjin, China; College of Intelligence and Computing, Tianjin University, Tianjin, China; College of Intelligence and Computing, Tianjin University, Tianjin, China; School of Computer Science and Technology, Soochow University, Suzhou, China; College of Intelligence and Computing, Tianjin University, Tianjin, China; NetEase Fuxi Lab, NetEase, Inc., Hangzhou, China", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;suda.edu.cn;tju.edu.cn;netease.com", "email": "tju.edu.cn;tju.edu.cn;tju.edu.cn;suda.edu.cn;tju.edu.cn;netease.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/85422afb467e9456013a2a51d4dff702-Abstract.html", "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "Tianjin University;Soochow University;NetEase, Inc.", "aff_unique_dep": "College of Intelligence and Computing;School of Computer Science and Technology;NetEase Fuxi Lab", "aff_unique_url": "http://www.tju.edu.cn;http://www.soochow.edu.cn;https://www.163.com", "aff_unique_abbr": "Tianjin University;;NetEase", "aff_campus_unique_index": "0;0;0;1;0;2", "aff_campus_unique": "Tianjin;Suzhou;Hangzhou", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "A Dual Framework for Low-rank Tensor Completion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11536", "id": "11536", "author_site": "Madhav Nimishakavi, Pratik Kumar Jawanpuria, Bamdev Mishra", "author": "Madhav Nimishakavi; Pratik Kumar Jawanpuria; Bamdev Mishra", "abstract": "One of the popular approaches for low-rank tensor completion is to use the latent trace norm regularization. However, most existing works in this direction learn a sparse combination of tensors. In this work, we fill this gap by proposing a variant of the latent trace norm that helps in learning a non-sparse combination of tensors. We develop a dual framework for solving the low-rank tensor completion problem. We first show a novel characterization of the dual solution space with an interesting factorization of the optimal solution. Overall, the optimal solution is shown to lie on a Cartesian product of Riemannian manifolds. Furthermore, we exploit the versatile Riemannian optimization framework for proposing computationally efficient trust region algorithm. The experiments illustrate the efficacy of the proposed algorithm on several real-world datasets across applications.", "bibtex": "@inproceedings{NEURIPS2018_09a5e2a1,\n author = {Nimishakavi, Madhav and Jawanpuria, Pratik Kumar and Mishra, Bamdev},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Dual Framework for Low-rank Tensor Completion},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/09a5e2a11bea20817477e0b1dfe2cc21-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/09a5e2a11bea20817477e0b1dfe2cc21-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/09a5e2a11bea20817477e0b1dfe2cc21-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/09a5e2a11bea20817477e0b1dfe2cc21-Reviews.html", "metareview": "", "pdf_size": 608386, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2085800808347416785&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Indian Institute of Science, India; Microsoft, India; Microsoft, India", "aff_domain": "iisc.ac.in;microsoft.com;microsoft.com", "email": "iisc.ac.in;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/09a5e2a11bea20817477e0b1dfe2cc21-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Indian Institute of Science;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.iisc.ac.in;https://www.microsoft.com/en-in", "aff_unique_abbr": "IISc;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "title": "A Game-Theoretic Approach to Recommendation Systems with Strategic Content Providers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11130", "id": "11130", "author_site": "Omer Ben-Porat, Moshe Tennenholtz", "author": "Omer Ben-Porat; Moshe Tennenholtz", "abstract": "We introduce a game-theoretic approach to the study of recommendation systems with strategic content providers. Such systems should be fair and stable. Showing that traditional approaches fail to satisfy these requirements, we propose the Shapley mediator. We show that the Shapley mediator satisfies the fairness and stability requirements, runs in linear time, and is the only economically efficient mechanism satisfying these properties.", "bibtex": "@inproceedings{NEURIPS2018_a9a1d531,\n author = {Ben-Porat, Omer and Tennenholtz, Moshe},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Game-Theoretic Approach to Recommendation Systems with Strategic Content Providers},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a9a1d5317a33ae8cef33961c34144f84-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a9a1d5317a33ae8cef33961c34144f84-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a9a1d5317a33ae8cef33961c34144f84-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a9a1d5317a33ae8cef33961c34144f84-Reviews.html", "metareview": "", "pdf_size": 355532, "gs_citation": 93, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14418362814228736876&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": ";", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a9a1d5317a33ae8cef33961c34144f84-Abstract.html" }, { "title": "A General Method for Amortizing Variational Filtering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11754", "id": "11754", "author_site": "Joseph Marino, Milan Cvitkovic, Yisong Yue", "author": "Joseph Marino; Milan Cvitkovic; Yisong Yue", "abstract": "We introduce the variational filtering EM algorithm, a simple, general-purpose method for performing variational inference in dynamical latent variable models using information from only past and present variables, i.e. filtering. The algorithm is derived from the variational objective in the filtering setting and consists of an optimization procedure at each time step. By performing each inference optimization procedure with an iterative amortized inference model, we obtain a computationally efficient implementation of the algorithm, which we call amortized variational filtering. We present experiments demonstrating that this general-purpose method improves inference performance across several recent deep dynamical latent variable models.", "bibtex": "@inproceedings{NEURIPS2018_060afc8a,\n author = {Marino, Joseph and Cvitkovic, Milan and Yue, Yisong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A General Method for Amortizing Variational Filtering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/060afc8a563aaccd288f98b7c8723b61-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/060afc8a563aaccd288f98b7c8723b61-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/060afc8a563aaccd288f98b7c8723b61-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/060afc8a563aaccd288f98b7c8723b61-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/060afc8a563aaccd288f98b7c8723b61-Reviews.html", "metareview": "", "pdf_size": 842107, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11262711494393358792&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 16, "aff": "California Institute of Technology; California Institute of Technology; California Institute of Technology", "aff_domain": "caltech.edu;caltech.edu;caltech.edu", "email": "caltech.edu;caltech.edu;caltech.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/060afc8a563aaccd288f98b7c8723b61-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "California Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.caltech.edu", "aff_unique_abbr": "Caltech", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pasadena", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Likelihood-Free Inference Framework for Population Genetic Data using Exchangeable Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11821", "id": "11821", "author_site": "Jeffrey Chan, Valerio Perrone, Jeffrey Spence, Paul Jenkins, Sara Mathieson, Yun Song", "author": "Jeffrey Chan; Valerio Perrone; Jeffrey Spence; Paul Jenkins; Sara Mathieson; Yun Song", "abstract": "An explosion of high-throughput DNA sequencing in the past decade has led to a surge of interest in population-scale inference with whole-genome data. Recent work in population genetics has centered on designing inference methods for relatively simple model classes, and few scalable general-purpose inference techniques exist for more realistic, complex models. To achieve this, two inferential challenges need to be addressed: (1) population data are exchangeable, calling for methods that efficiently exploit the symmetries of the data, and (2) computing likelihoods is intractable as it requires integrating over a set of correlated, extremely high-dimensional latent variables. These challenges are traditionally tackled by likelihood-free methods that use scientific simulators to generate datasets and reduce them to hand-designed, permutation-invariant summary statistics, often leading to inaccurate inference. In this work, we develop an exchangeable neural network that performs summary statistic-free, likelihood-free inference. Our framework can be applied in a black-box fashion across a variety of simulation-based tasks, both within and outside biology. We demonstrate the power of our approach on the recombination hotspot testing problem, outperforming the state-of-the-art.", "bibtex": "@inproceedings{NEURIPS2018_2e9f978b,\n author = {Chan, Jeffrey and Perrone, Valerio and Spence, Jeffrey and Jenkins, Paul and Mathieson, Sara and Song, Yun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Likelihood-Free Inference Framework for Population Genetic Data using Exchangeable Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2e9f978b222a956ba6bdf427efbd9ab3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2e9f978b222a956ba6bdf427efbd9ab3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2e9f978b222a956ba6bdf427efbd9ab3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2e9f978b222a956ba6bdf427efbd9ab3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2e9f978b222a956ba6bdf427efbd9ab3-Reviews.html", "metareview": "", "pdf_size": 855721, "gs_citation": 137, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5564729426311739157&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "University of California, Berkeley; University of Warwick; University of California, Berkeley; University of Warwick; Swarthmore College; University of California, Berkeley", "aff_domain": "berkeley.edu;warwick.ac.uk;berkeley.edu;warwick.ac.uk;swarthmore.edu;berkeley.edu", "email": "berkeley.edu;warwick.ac.uk;berkeley.edu;warwick.ac.uk;swarthmore.edu;berkeley.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2e9f978b222a956ba6bdf427efbd9ab3-Abstract.html", "aff_unique_index": "0;1;0;1;2;0", "aff_unique_norm": "University of California, Berkeley;University of Warwick;Swarthmore College", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.warwick.ac.uk;https://www.swarthmore.edu", "aff_unique_abbr": "UC Berkeley;Warwick;Swarthmore", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "A Linear Speedup Analysis of Distributed Deep Learning with Sparse and Quantized Communication", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11261", "id": "11261", "author_site": "Peng Jiang, Gagan Agrawal", "author": "Peng Jiang; Gagan Agrawal", "abstract": "The large communication overhead has imposed a bottleneck on the performance of distributed Stochastic Gradient Descent (SGD) for training deep neural networks. Previous works have demonstrated the potential of using gradient sparsification and quantization to reduce the communication cost. However, there is still a lack of understanding about how sparse and quantized communication affects the convergence rate of the training algorithm. In this paper, we study the convergence rate of distributed SGD for non-convex optimization with two communication reducing strategies: sparse parameter averaging and gradient quantization. We show that $O(1/\\sqrt{MK})$ convergence rate can be achieved if the sparsification and quantization hyperparameters are configured properly. We also propose a strategy called periodic quantized averaging (PQASGD) that further reduces the communication cost while preserving the $O(1/\\sqrt{MK})$ convergence rate. Our evaluation validates our theoretical results and shows that our PQASGD can converge as fast as full-communication SGD with only $3\\%-5\\%$ communication data size.", "bibtex": "@inproceedings{NEURIPS2018_17326d10,\n author = {Jiang, Peng and Agrawal, Gagan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Linear Speedup Analysis of Distributed Deep Learning with Sparse and Quantized Communication},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/17326d10d511828f6b34fa6d751739e2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/17326d10d511828f6b34fa6d751739e2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/17326d10d511828f6b34fa6d751739e2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/17326d10d511828f6b34fa6d751739e2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/17326d10d511828f6b34fa6d751739e2-Reviews.html", "metareview": "", "pdf_size": 1063407, "gs_citation": 251, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16839781962576097385&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "The Ohio State University; The Ohio State University", "aff_domain": "osu.edu;cse.ohio-state.edu", "email": "osu.edu;cse.ohio-state.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/17326d10d511828f6b34fa6d751739e2-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Lyapunov-based Approach to Safe Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11775", "id": "11775", "author_site": "Yinlam Chow, Ofir Nachum, Edgar Duenez-Guzman, Mohammad Ghavamzadeh", "author": "Yinlam Chow; Ofir Nachum; Edgar Duenez-Guzman; Mohammad Ghavamzadeh", "abstract": "In many real-world reinforcement learning (RL) problems, besides optimizing the main objective function, an agent must concurrently avoid violating a number of constraints. In particular, besides optimizing performance, it is crucial to guarantee the safety of an agent during training as well as deployment (e.g., a robot should avoid taking actions - exploratory or not - which irrevocably harm its hard- ware). To incorporate safety in RL, we derive algorithms under the framework of constrained Markov decision processes (CMDPs), an extension of the standard Markov decision processes (MDPs) augmented with constraints on expected cumulative costs. Our approach hinges on a novel Lyapunov method. We define and present a method for constructing Lyapunov functions, which provide an effective way to guarantee the global safety of a behavior policy during training via a set of local linear constraints. Leveraging these theoretical underpinnings, we show how to use the Lyapunov approach to systematically transform dynamic programming (DP) and RL algorithms into their safe counterparts. To illustrate their effectiveness, we evaluate these algorithms in several CMDP planning and decision-making tasks on a safety benchmark domain. Our results show that our proposed method significantly outperforms existing baselines in balancing constraint satisfaction and performance.", "bibtex": "@inproceedings{NEURIPS2018_4fe51490,\n author = {Chow, Yinlam and Nachum, Ofir and Duenez-Guzman, Edgar and Ghavamzadeh, Mohammad},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Lyapunov-based Approach to Safe Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4fe5149039b52765bde64beb9f674940-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4fe5149039b52765bde64beb9f674940-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4fe5149039b52765bde64beb9f674940-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4fe5149039b52765bde64beb9f674940-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4fe5149039b52765bde64beb9f674940-Reviews.html", "metareview": "", "pdf_size": 744387, "gs_citation": 664, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11292083648863613816&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "DeepMind; Google Brain; DeepMind; Facebook AI Research", "aff_domain": "google.com;google.com;google.com;fb.com", "email": "google.com;google.com;google.com;fb.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4fe5149039b52765bde64beb9f674940-Abstract.html", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "DeepMind;Google;Meta", "aff_unique_dep": ";Google Brain;Facebook AI Research", "aff_unique_url": "https://deepmind.com;https://brain.google.com;https://research.facebook.com", "aff_unique_abbr": "DeepMind;Google Brain;FAIR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "A Mathematical Model For Optimal Decisions In A Representative Democracy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11462", "id": "11462", "author_site": "Malik Magdon-Ismail, Lirong Xia", "author": "Malik Magdon-Ismail; Lirong Xia", "abstract": "Direct democracy, where each voter casts one vote, fails when the average voter competence falls below 50%. This happens in noisy settings when voters have limited information. Representative democracy, where voters choose representatives to vote, can be an elixir in both these situations. We introduce a mathematical model for studying representative democracy, in particular understanding the parameters of a representative democracy that gives maximum decision making capability. Our main result states that under general and natural conditions,", "bibtex": "@inproceedings{NEURIPS2018_fa2431bf,\n author = {Magdon-Ismail, Malik and Xia, Lirong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Mathematical Model For Optimal Decisions In A Representative Democracy},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fa2431bf9d65058fe34e9713e32d60e6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fa2431bf9d65058fe34e9713e32d60e6-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fa2431bf9d65058fe34e9713e32d60e6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fa2431bf9d65058fe34e9713e32d60e6-Reviews.html", "metareview": "", "pdf_size": 319148, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15149791939003951522&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science, Rensselaer Polytechnic Institute, Troy, NY 12180; Department of Computer Science, Rensselaer Polytechnic Institute, Troy, NY 12180", "aff_domain": "cs.rpi.edu;cs.rpi.edu", "email": "cs.rpi.edu;cs.rpi.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fa2431bf9d65058fe34e9713e32d60e6-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Rensselaer Polytechnic Institute", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.rpi.edu", "aff_unique_abbr": "RPI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Troy", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Model for Learned Bloom Filters and Optimizing by Sandwiching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11070", "id": "11070", "author": "Michael Mitzenmacher", "abstract": "Recent work has suggested enhancing Bloom filters by using a pre-filter, based on applying machine learning to determine a function that models the data set the Bloom filter is meant to represent. Here we model such learned Bloom filters, with the following outcomes: (1) we clarify what guarantees can and cannot be associated with such a structure; (2) we show how to estimate what size the learning function must obtain in order to obtain improved performance; (3) we provide a simple method, sandwiching, for optimizing learned Bloom filters; and (4) we propose a design and analysis approach for a learned Bloomier filter, based on our modeling approach.", "bibtex": "@inproceedings{NEURIPS2018_0f49c89d,\n author = {Mitzenmacher, Michael},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Model for Learned Bloom Filters and Optimizing by Sandwiching},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0f49c89d1e7298bb9930789c8ed59d48-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0f49c89d1e7298bb9930789c8ed59d48-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0f49c89d1e7298bb9930789c8ed59d48-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0f49c89d1e7298bb9930789c8ed59d48-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0f49c89d1e7298bb9930789c8ed59d48-Reviews.html", "metareview": "", "pdf_size": 355893, "gs_citation": 236, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2619667217536880866&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "School of Engineering and Applied Sciences, Harvard University", "aff_domain": "eecs.harvard.edu", "email": "eecs.harvard.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0f49c89d1e7298bb9930789c8ed59d48-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "School of Engineering and Applied Sciences", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "A Neural Compositional Paradigm for Image Captioning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11088", "id": "11088", "author_site": "Bo Dai, Sanja Fidler, Dahua Lin", "author": "Bo Dai; Sanja Fidler; Dahua Lin", "abstract": "Mainstream captioning models often follow a sequential structure to generate cap-\ntions, leading to issues such as introduction of irrelevant semantics, lack of diversity in the generated captions, and inadequate generalization performance. In this paper, we present an alternative paradigm for image captioning, which factorizes the captioning procedure into two stages: (1) extracting an explicit semantic representation from the given image; and (2) constructing the caption based on a recursive compositional procedure in a bottom-up manner. Compared to conventional ones, our paradigm better preserves the semantic content through an explicit factorization of semantics and syntax. By using the compositional generation procedure, caption construction follows a recursive structure, which naturally fits the properties of human language. Moreover, the proposed compositional procedure requires less data to train, generalizes better, and yields more diverse captions.", "bibtex": "@inproceedings{NEURIPS2018_8bf1211f,\n author = {Dai, Bo and Fidler, Sanja and Lin, Dahua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Neural Compositional Paradigm for Image Captioning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8bf1211fd4b7b94528899de0a43b9fb3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8bf1211fd4b7b94528899de0a43b9fb3-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8bf1211fd4b7b94528899de0a43b9fb3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8bf1211fd4b7b94528899de0a43b9fb3-Reviews.html", "metareview": "", "pdf_size": 1557511, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5204622663705352577&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "CUHK-SenseTime Joint Lab, The Chinese University of Hong Kong; University of Toronto+Vector Institute+NVIDIA; CUHK-SenseTime Joint Lab, The Chinese University of Hong Kong", "aff_domain": "ie.cuhk.edu.hk;cs.toronto.edu;ie.cuhk.edu.hk", "email": "ie.cuhk.edu.hk;cs.toronto.edu;ie.cuhk.edu.hk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8bf1211fd4b7b94528899de0a43b9fb3-Abstract.html", "aff_unique_index": "0;1+2+3;0", "aff_unique_norm": "Chinese University of Hong Kong;University of Toronto;Vector Institute;NVIDIA", "aff_unique_dep": "CUHK-SenseTime Joint Lab;;;NVIDIA Corporation", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.utoronto.ca;https://vectorinstitute.ai/;https://www.nvidia.com", "aff_unique_abbr": "CUHK;U of T;Vector Institute;NVIDIA", "aff_campus_unique_index": "0;;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1+1+2;0", "aff_country_unique": "China;Canada;United States" }, { "title": "A Practical Algorithm for Distributed Clustering and Outlier Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11235", "id": "11235", "author_site": "Jiecao Chen, Erfan Sadeqi Azer, Qin Zhang", "author": "Jiecao Chen; Erfan Sadeqi Azer; Qin Zhang", "abstract": "We study the classic k-means/median clustering, which are fundamental problems in unsupervised learning, in the setting where data are partitioned across multiple sites, and where we are allowed to discard a small portion of the data by labeling them as outliers. We propose a simple approach based on constructing small summary for the original dataset. The proposed method is time and communication efficient, has good approximation guarantees, and can identify the global outliers effectively.", "bibtex": "@inproceedings{NEURIPS2018_f7f580e1,\n author = {Chen, Jiecao and Sadeqi Azer, Erfan and Zhang, Qin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Practical Algorithm for Distributed Clustering and Outlier Detection},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f7f580e11d00a75814d2ded41fe8e8fe-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f7f580e11d00a75814d2ded41fe8e8fe-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f7f580e11d00a75814d2ded41fe8e8fe-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f7f580e11d00a75814d2ded41fe8e8fe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f7f580e11d00a75814d2ded41fe8e8fe-Reviews.html", "metareview": "", "pdf_size": 425042, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1532831678132776651&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Indiana University Bloomington; Indiana University Bloomington; Indiana University Bloomington", "aff_domain": "indiana.edu;indiana.edu;indiana.edu", "email": "indiana.edu;indiana.edu;indiana.edu", "github": "", "project": "https://arxiv.org/abs/1805.09495", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f7f580e11d00a75814d2ded41fe8e8fe-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Indiana University", "aff_unique_dep": "", "aff_unique_url": "https://www.indiana.edu", "aff_unique_abbr": "IU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Bloomington", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Probabilistic U-Net for Segmentation of Ambiguous Images", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11671", "id": "11671", "author_site": "Simon Kohl, Bernardino Romera-Paredes, Clemens Meyer, Jeffrey De Fauw, Joseph R. Ledsam, Klaus Maier-Hein, S. M. Ali Eslami, Danilo Jimenez Rezende, Olaf Ronneberger", "author": "Simon Kohl; Bernardino Romera-Paredes; Clemens Meyer; Jeffrey De Fauw; Joseph R. Ledsam; Klaus Maier-Hein; S. M. Ali Eslami; Danilo Jimenez Rezende; Olaf Ronneberger", "abstract": "Many real-world vision problems suffer from inherent ambiguities. In clinical applications for example, it might not be clear from a CT scan alone which particular region is cancer tissue. Therefore a group of graders typically produces a set of diverse but plausible segmentations. We consider the task of learning a distribution over segmentations given an input. To this end we propose a generative segmentation model based on a combination of a U-Net with a conditional variational autoencoder that is capable of efficiently producing an unlimited number of plausible hypotheses. We show on a lung abnormalities segmentation task and on a Cityscapes segmentation task that our model reproduces the possible segmentation variants as well as the frequencies with which they occur, doing so significantly better than published approaches. These models could have a high impact in real-world applications, such as being used as clinical decision-making algorithms accounting for multiple plausible semantic segmentation hypotheses to provide possible diagnoses and recommend further actions to resolve the present ambiguities.", "bibtex": "@inproceedings{NEURIPS2018_473447ac,\n author = {Kohl, Simon and Romera-Paredes, Bernardino and Meyer, Clemens and De Fauw, Jeffrey and Ledsam, Joseph R. and Maier-Hein, Klaus and Eslami, S. M. Ali and Jimenez Rezende, Danilo and Ronneberger, Olaf},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Probabilistic U-Net for Segmentation of Ambiguous Images},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/473447ac58e1cd7e96172575f48dca3b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/473447ac58e1cd7e96172575f48dca3b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/473447ac58e1cd7e96172575f48dca3b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/473447ac58e1cd7e96172575f48dca3b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/473447ac58e1cd7e96172575f48dca3b-Reviews.html", "metareview": "", "pdf_size": 1548865, "gs_citation": 705, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17567416838130660215&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "DeepMind, London, UK+Division of Medical Image Computing, German Cancer Research Center, Heidelberg, Germany; DeepMind, London, UK; DeepMind, London, UK; DeepMind, London, UK; DeepMind, London, UK; Division of Medical Image Computing, German Cancer Research Center, Heidelberg, Germany; DeepMind, London, UK; DeepMind, London, UK; DeepMind, London, UK", "aff_domain": "dkfz.de;dkfz.de;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "email": "dkfz.de;dkfz.de;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 9, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/473447ac58e1cd7e96172575f48dca3b-Abstract.html", "aff_unique_index": "0+1;0;0;0;0;1;0;0;0", "aff_unique_norm": "DeepMind;German Cancer Research Center", "aff_unique_dep": ";Division of Medical Image Computing", "aff_unique_url": "https://deepmind.com;https://www.dkfz.de", "aff_unique_abbr": "DeepMind;DKFZ", "aff_campus_unique_index": "0+1;0;0;0;0;1;0;0;0", "aff_campus_unique": "London;Heidelberg", "aff_country_unique_index": "0+1;0;0;0;0;1;0;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "A Reduction for Efficient LDA Topic Reconstruction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11755", "id": "11755", "author_site": "Matteo Almanza, Flavio Chierichetti, Alessandro Panconesi, Andrea Vattani", "author": "Matteo Almanza; Flavio Chierichetti; Alessandro Panconesi; Andrea Vattani", "abstract": "We present a novel approach for LDA (Latent Dirichlet Allocation) topic reconstruction. The main technical idea is to show that the distribution over the documents generated by LDA can be transformed into a distribution for a much simpler generative model in which documents are generated from {\\em the same set of topics} but have a much simpler structure: documents are single topic and topics are chosen uniformly at random. Furthermore, this reduction is approximation preserving, in the sense that approximate distributions-- the only ones we can hope to compute in practice-- are mapped into approximate distribution in the simplified world. This opens up the possibility of efficiently reconstructing LDA topics in a roundabout way. Compute an approximate document distribution from the given corpus, transform it into an approximate distribution for the single-topic world, and run a reconstruction algorithm in the uniform, single topic world-- a much simpler task than direct LDA reconstruction. Indeed, we show the viability of the approach by giving very simple algorithms for a generalization of two notable cases that have been studied in the literature, $p$-separability and Gibbs sampling for matrix-like topics.", "bibtex": "@inproceedings{NEURIPS2018_e9257036,\n author = {Almanza, Matteo and Chierichetti, Flavio and Panconesi, Alessandro and Vattani, Andrea},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Reduction for Efficient LDA Topic Reconstruction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e9257036daf20f062a498aab563d7712-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e9257036daf20f062a498aab563d7712-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e9257036daf20f062a498aab563d7712-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e9257036daf20f062a498aab563d7712-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e9257036daf20f062a498aab563d7712-Reviews.html", "metareview": "", "pdf_size": 444500, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=217929161506400983&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "aff": "Sapienza University; Sapienza University; Sapienza University; Spiketrap", "aff_domain": "di.uniroma1.it;di.uniroma1.it;di.uniroma1.it;cs.ucsd.edu", "email": "di.uniroma1.it;di.uniroma1.it;di.uniroma1.it;cs.ucsd.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e9257036daf20f062a498aab563d7712-Abstract.html", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Sapienza University of Rome;Spiketrap", "aff_unique_dep": ";", "aff_unique_url": "https://www.uniroma1.it;https://www.spiketrap.com", "aff_unique_abbr": "Sapienza;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Italy;Netherlands" }, { "title": "A Retrieve-and-Edit Framework for Predicting Structured Outputs", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11953", "id": "11953", "author_site": "Tatsunori Hashimoto, Kelvin Guu, Yonatan Oren, Percy Liang", "author": "Tatsunori B Hashimoto; Kelvin Guu; Yonatan Oren; Percy Liang", "abstract": "For the task of generating complex outputs such as source code, editing existing outputs can be easier than generating complex outputs from scratch.\nWith this motivation, we propose an approach that first retrieves a training example based on the input (e.g., natural language description) and then edits it to the desired output (e.g., code).\nOur contribution is a computationally efficient method for learning a retrieval model that embeds the input in a task-dependent way without relying on a hand-crafted metric or incurring the expense of jointly training the retriever with the editor.\nOur retrieve-and-edit framework can be applied on top of any base model.\nWe show that on a new autocomplete task for GitHub Python code and the Hearthstone cards benchmark, retrieve-and-edit significantly boosts the performance of a vanilla sequence-to-sequence model on both tasks.", "bibtex": "@inproceedings{NEURIPS2018_cd17d3ce,\n author = {Hashimoto, Tatsunori B and Guu, Kelvin and Oren, Yonatan and Liang, Percy S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Retrieve-and-Edit Framework for Predicting Structured Outputs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cd17d3ce3b64f227987cd92cd701cc58-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cd17d3ce3b64f227987cd92cd701cc58-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cd17d3ce3b64f227987cd92cd701cc58-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cd17d3ce3b64f227987cd92cd701cc58-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cd17d3ce3b64f227987cd92cd701cc58-Reviews.html", "metareview": "", "pdf_size": 1458486, "gs_citation": 193, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18226674669070722453&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, Stanford University; Department of Statistics, Stanford University; Department of Computer Science, Stanford University; Department of Computer Science, Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu", "email": "stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cd17d3ce3b64f227987cd92cd701cc58-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Simple Cache Model for Image Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11958", "id": "11958", "author": "Emin Orhan", "abstract": "Training large-scale image recognition models is computationally expensive. This raises the question of whether there might be simple ways to improve the test performance of an already trained model without having to re-train or fine-tune it with new data. Here, we show that, surprisingly, this is indeed possible. The key observation we make is that the layers of a deep network close to the output layer contain independent, easily extractable class-relevant information that is not contained in the output layer itself. We propose to extract this extra class-relevant information using a simple key-value cache memory to improve the classification performance of the model at test time. Our cache memory is directly inspired by a similar cache model previously proposed for language modeling (Grave et al., 2017). This cache component does not require any training or fine-tuning; it can be applied to any pre-trained model and, by properly setting only two hyper-parameters, leads to significant improvements in its classification performance. Improvements are observed across several architectures and datasets. In the cache component, using features extracted from layers close to the output (but not from the output layer itself) as keys leads to the largest improvements. Concatenating features from multiple layers to form keys can further improve performance over using single-layer features as keys. The cache component also has a regularizing effect, a simple consequence of which is that it substantially increases the robustness of models against adversarial attacks.", "bibtex": "@inproceedings{NEURIPS2018_6e091746,\n author = {Orhan, Emin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Simple Cache Model for Image Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6e0917469214d8fbd8c517dcdc6b8dcf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6e0917469214d8fbd8c517dcdc6b8dcf-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6e0917469214d8fbd8c517dcdc6b8dcf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6e0917469214d8fbd8c517dcdc6b8dcf-Reviews.html", "metareview": "", "pdf_size": 695567, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3091315690960335000&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Baylor College of Medicine & New York University", "aff_domain": "gmail.com", "email": "gmail.com", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6e0917469214d8fbd8c517dcdc6b8dcf-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Baylor College of Medicine", "aff_unique_dep": "", "aff_unique_url": "https://www.bcm.edu", "aff_unique_abbr": "BCM", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "A Simple Proximal Stochastic Gradient Method for Nonsmooth Nonconvex Optimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11543", "id": "11543", "author_site": "Zhize Li, Jian Li", "author": "Zhize Li; Jian Li", "abstract": "We analyze stochastic gradient algorithms for optimizing nonconvex, nonsmooth finite-sum problems. In particular, the objective function is given by the summation of a differentiable (possibly nonconvex) component, together with a possibly non-differentiable but convex component.\nWe propose a proximal stochastic gradient algorithm based on variance reduction, called ProxSVRG+.\nOur main contribution lies in the analysis of ProxSVRG+.\nIt recovers several existing convergence results and improves/generalizes them (in terms of the number of stochastic gradient oracle calls and proximal oracle calls).\nIn particular, ProxSVRG+ generalizes the best results given by the SCSG algorithm, recently proposed by [Lei et al., NIPS'17] for the smooth nonconvex case.\nProxSVRG+ is also more straightforward than SCSG and yields simpler analysis.\nMoreover, ProxSVRG+ outperforms the deterministic proximal gradient descent (ProxGD) for a wide range of minibatch sizes, which partially solves an open problem proposed in [Reddi et al., NIPS'16].\nAlso, ProxSVRG+ uses much less proximal oracle calls than ProxSVRG [Reddi et al., NIPS'16].\nMoreover, for nonconvex functions satisfied Polyak-\\L{}ojasiewicz condition, we prove that ProxSVRG+ achieves a global linear convergence rate without restart unlike ProxSVRG.\nThus, it can \\emph{automatically} switch to the faster linear convergence in some regions as long as the objective function satisfies the PL condition locally in these regions.\nFinally, we conduct several experiments and the experimental results are consistent with the theoretical results.", "bibtex": "@inproceedings{NEURIPS2018_e727fa59,\n author = {Li, Zhize and Li, Jian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Simple Proximal Stochastic Gradient Method for Nonsmooth Nonconvex Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e727fa59ddefcefb5d39501167623132-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e727fa59ddefcefb5d39501167623132-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e727fa59ddefcefb5d39501167623132-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e727fa59ddefcefb5d39501167623132-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e727fa59ddefcefb5d39501167623132-Reviews.html", "metareview": "", "pdf_size": 733877, "gs_citation": 130, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12614001023443496903&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "IIIS, Tsinghua University; IIIS, Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;mail.tsinghua.edu.cn", "email": "mails.tsinghua.edu.cn;mail.tsinghua.edu.cn", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e727fa59ddefcefb5d39501167623132-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Institute for Interdisciplinary Information Sciences", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "A Simple Unified Framework for Detecting Out-of-Distribution Samples and Adversarial Attacks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11690", "id": "11690", "author_site": "Kimin Lee, Kibok Lee, Honglak Lee, Jinwoo Shin", "author": "Kimin Lee; Kibok Lee; Honglak Lee; Jinwoo Shin", "abstract": "Detecting test samples drawn sufficiently far away from the training distribution statistically or adversarially is a fundamental requirement for deploying a good classifier in many real-world machine learning applications. However, deep neural networks with the softmax classifier are known to produce highly overconfident posterior distributions even for such abnormal samples. In this paper, we propose a simple yet effective method for detecting any abnormal samples, which is applicable to any pre-trained softmax neural classifier. We obtain the class conditional Gaussian distributions with respect to (low- and upper-level) features of the deep models under Gaussian discriminant analysis, which result in a confidence score based on the Mahalanobis distance. While most prior methods have been evaluated for detecting either out-of-distribution or adversarial samples, but not both, the proposed method achieves the state-of-the-art performances for both cases in our experiments. Moreover, we found that our proposed method is more robust in harsh cases, e.g., when the training dataset has noisy labels or small number of samples. Finally, we show that the proposed method enjoys broader usage by applying it to class-incremental learning: whenever out-of-distribution samples are detected, our classification rule can incorporate new classes well without further training deep models.", "bibtex": "@inproceedings{NEURIPS2018_abdeb6f5,\n author = {Lee, Kimin and Lee, Kibok and Lee, Honglak and Shin, Jinwoo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Simple Unified Framework for Detecting Out-of-Distribution Samples and Adversarial Attacks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/abdeb6f575ac5c6676b747bca8d09cc2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/abdeb6f575ac5c6676b747bca8d09cc2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/abdeb6f575ac5c6676b747bca8d09cc2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/abdeb6f575ac5c6676b747bca8d09cc2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/abdeb6f575ac5c6676b747bca8d09cc2-Reviews.html", "metareview": "", "pdf_size": 817949, "gs_citation": 2496, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=59561906500021733&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/abdeb6f575ac5c6676b747bca8d09cc2-Abstract.html" }, { "title": "A Smoothed Analysis of the Greedy Algorithm for the Linear Contextual Bandit Problem", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11233", "id": "11233", "author_site": "Sampath Kannan, Jamie Morgenstern, Aaron Roth, Bo Waggoner, Zhiwei Steven Wu", "author": "Sampath Kannan; Jamie H Morgenstern; Aaron Roth; Bo Waggoner; Zhiwei Steven Wu", "abstract": "Bandit learning is characterized by the tension between long-term exploration and short-term exploitation. However, as has recently been noted, in settings in which the choices of the learning algorithm correspond to important decisions about individual people (such as criminal recidivism prediction, lending, and sequential drug trials), exploration corresponds to explicitly sacrificing the well-being of one individual for the potential future benefit of others. In such settings, one might like to run a ``greedy'' algorithm, which always makes the optimal decision for the individuals at hand --- but doing this can result in a catastrophic failure to learn. In this paper, we consider the linear contextual bandit problem and revisit the performance of the greedy algorithm.", "bibtex": "@inproceedings{NEURIPS2018_2cfd4560,\n author = {Kannan, Sampath and Morgenstern, Jamie H and Roth, Aaron and Waggoner, Bo and Wu, Zhiwei Steven},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Smoothed Analysis of the Greedy Algorithm for the Linear Contextual Bandit Problem},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2cfd4560539f887a5e420412b370b361-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2cfd4560539f887a5e420412b370b361-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2cfd4560539f887a5e420412b370b361-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2cfd4560539f887a5e420412b370b361-Reviews.html", "metareview": "", "pdf_size": 346243, "gs_citation": 130, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2380485659925756732&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of Pennsylvania; Georgia Tech; University of Pennsylvania; Microsoft Research, NYC; University of Minnesota", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "https://arxiv.org/abs/1801.03423", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2cfd4560539f887a5e420412b370b361-Abstract.html", "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "University of Pennsylvania;Georgia Institute of Technology;Microsoft;University of Minnesota", "aff_unique_dep": ";;Research;", "aff_unique_url": "https://www.upenn.edu;https://www.gatech.edu;https://www.microsoft.com/en-us/research;https://www.minnesota.edu", "aff_unique_abbr": "UPenn;Georgia Tech;MSR;UMN", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York City", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Smoother Way to Train Structured Prediction Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11468", "id": "11468", "author_site": "Krishna Pillutla, Vincent Roulet, Sham Kakade, Zaid Harchaoui", "author": "Venkata Krishna Pillutla; Vincent Roulet; Sham M. Kakade; Zaid Harchaoui", "abstract": "We present a framework to train a structured prediction model by performing smoothing on the inference algorithm it builds upon. Smoothing overcomes the non-smoothness inherent to the maximum margin structured prediction objective, and paves the way for the use of fast primal gradient-based optimization algorithms. We illustrate the proposed framework by developing a novel primal incremental optimization algorithm for the structural support vector machine. The proposed algorithm blends an extrapolation scheme for acceleration and an adaptive smoothing scheme and builds upon the stochastic variance-reduced gradient algorithm. We establish its worst-case global complexity bound and study several practical variants. We present experimental results on two real-world problems, namely named entity recognition and visual object localization. The experimental results show that the proposed framework allows us to build upon efficient inference algorithms to develop large-scale optimization algorithms for structured prediction which can achieve competitive performance on the two real-world problems.", "bibtex": "@inproceedings{NEURIPS2018_6211080f,\n author = {Pillutla, Venkata Krishna and Roulet, Vincent and Kakade, Sham M and Harchaoui, Zaid},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Smoother Way to Train Structured Prediction Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6211080fa89981f66b1a0c9d55c61d0f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6211080fa89981f66b1a0c9d55c61d0f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6211080fa89981f66b1a0c9d55c61d0f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6211080fa89981f66b1a0c9d55c61d0f-Reviews.html", "metareview": "", "pdf_size": 3755790, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9176087356126828757&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Paul G. Allen School of Computer Science & Engineering and Department of Statistics, University of Washington; Paul G. Allen School of Computer Science & Engineering and Department of Statistics, University of Washington; Paul G. Allen School of Computer Science & Engineering and Department of Statistics, University of Washington; Paul G. Allen School of Computer Science & Engineering and Department of Statistics, University of Washington", "aff_domain": "uw.edu; ; ; ", "email": "uw.edu; ; ; ", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6211080fa89981f66b1a0c9d55c61d0f-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Paul G. Allen School of Computer Science & Engineering", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Spectral View of Adversarially Robust Features", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11961", "id": "11961", "author_site": "Shivam Garg, Vatsal Sharan, Brian Zhang, Gregory Valiant", "author": "Shivam Garg; Vatsal Sharan; Brian Zhang; Gregory Valiant", "abstract": "Given the apparent difficulty of learning models that are robust to adversarial perturbations, we propose tackling the simpler problem of developing adversarially robust features. Specifically, given a dataset and metric of interest, the goal is to return a function (or multiple functions) that 1) is robust to adversarial perturbations, and 2) has significant variation across the datapoints. We establish strong connections between adversarially robust features and a natural spectral property of the geometry of the dataset and metric of interest. This connection can be leveraged to provide both robust features, and a lower bound on the robustness of any function that has significant variance across the dataset. Finally, we provide empirical evidence that the adversarially robust features given by this spectral approach can be fruitfully leveraged to learn a robust (and accurate) model.", "bibtex": "@inproceedings{NEURIPS2018_033cc385,\n author = {Garg, Shivam and Sharan, Vatsal and Zhang, Brian and Valiant, Gregory},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Spectral View of Adversarially Robust Features},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/033cc385728c51d97360020ed57776f0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/033cc385728c51d97360020ed57776f0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/033cc385728c51d97360020ed57776f0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/033cc385728c51d97360020ed57776f0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/033cc385728c51d97360020ed57776f0-Reviews.html", "metareview": "", "pdf_size": 445085, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16528040493715712027&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Stanford University; Stanford University; Stanford University; Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/033cc385728c51d97360020ed57776f0-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Statistical Recurrent Model on the Manifold of Symmetric Positive Definite Matrices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11848", "id": "11848", "author_site": "Rudrasis Chakraborty, Chun-Hao Yang, Xingjian Zhen, Monami Banerjee, Derek Archer, David Vaillancourt, Vikas Singh, Baba C Vemuri", "author": "Rudrasis Chakraborty; Chun-Hao Yang; Xingjian Zhen; Monami Banerjee; Derek Archer; David Vaillancourt; Vikas Singh; Baba Vemuri", "abstract": "In a number of disciplines, the data (e.g., graphs, manifolds) to be\nanalyzed are non-Euclidean in nature. Geometric deep learning\ncorresponds to techniques that generalize deep neural network models\nto such non-Euclidean spaces. Several recent papers have shown how\nconvolutional neural networks (CNNs) can be extended to learn with\ngraph-based data. In this work, we study the setting where the data\n(or measurements) are ordered, longitudinal or temporal in nature and\nlive on a Riemannian manifold -- this setting is common in a variety\nof problems in statistical machine learning, vision and medical\nimaging. We show how recurrent statistical recurrent network models\ncan be defined in such spaces. We give an efficient algorithm and\nconduct a rigorous analysis of its statistical properties. We perform\nextensive numerical experiments demonstrating competitive performance\nwith state of the art methods but with significantly less number of\nparameters. We also show applications to a statistical analysis task\nin brain imaging, a regime where deep neural network models have only\nbeen utilized in limited ways.", "bibtex": "@inproceedings{NEURIPS2018_7070f908,\n author = {Chakraborty, Rudrasis and Yang, Chun-Hao and Zhen, Xingjian and Banerjee, Monami and Archer, Derek and Vaillancourt, David and Singh, Vikas and Vemuri, Baba},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Statistical Recurrent Model on the Manifold of Symmetric Positive Definite Matrices},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7070f9088e456682f0f84f815ebda761-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7070f9088e456682f0f84f815ebda761-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7070f9088e456682f0f84f815ebda761-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7070f9088e456682f0f84f815ebda761-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7070f9088e456682f0f84f815ebda761-Reviews.html", "metareview": "", "pdf_size": 1199452, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5544428600595730510&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": ";;;;;;;", "aff_domain": ";;;;;;;", "email": ";;;;;;;", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7070f9088e456682f0f84f815ebda761-Abstract.html" }, { "title": "A Stein variational Newton method", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11874", "id": "11874", "author_site": "Gianluca Detommaso, Tiangang Cui, Youssef Marzouk, Alessio Spantini, Robert Scheichl", "author": "Gianluca Detommaso; Tiangang Cui; Youssef Marzouk; Alessio Spantini; Robert Scheichl", "abstract": "Stein variational gradient descent (SVGD) was recently proposed as a general purpose nonparametric variational inference algorithm: it minimizes the Kullback\u2013Leibler divergence between the target distribution and its approximation by implementing a form of functional gradient descent on a reproducing kernel Hilbert space [Liu & Wang, NIPS 2016]. In this paper, we accelerate and generalize the SVGD algorithm by including second-order information, thereby approximating a Newton-like iteration in function space. We also show how second-order information can lead to more effective choices of kernel. We observe significant computational gains over the original SVGD algorithm in multiple test cases.", "bibtex": "@inproceedings{NEURIPS2018_fdaa09fc,\n author = {Detommaso, Gianluca and Cui, Tiangang and Marzouk, Youssef and Spantini, Alessio and Scheichl, Robert},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Stein variational Newton method},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fdaa09fc5ed18d3226b3a1a00f1bc48c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fdaa09fc5ed18d3226b3a1a00f1bc48c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/fdaa09fc5ed18d3226b3a1a00f1bc48c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fdaa09fc5ed18d3226b3a1a00f1bc48c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fdaa09fc5ed18d3226b3a1a00f1bc48c-Reviews.html", "metareview": "", "pdf_size": 471034, "gs_citation": 151, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2381223671647654052&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "University of Bath & The Alan Turing Institute; Monash University; Massachusetts Institute of Technology; Massachusetts Institute of Technology; Heidelberg University", "aff_domain": "bath.ac.uk;monash.edu;mit.edu;mit.edu;uni-heidelberg.de", "email": "bath.ac.uk;monash.edu;mit.edu;mit.edu;uni-heidelberg.de", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fdaa09fc5ed18d3226b3a1a00f1bc48c-Abstract.html", "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "University of Bath;Monash University;Massachusetts Institute of Technology;Heidelberg University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.bath.ac.uk;https://www.monash.edu;https://web.mit.edu;https://www.uni-heidelberg.de", "aff_unique_abbr": "Bath;Monash;MIT;Uni Heidelberg", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;3", "aff_country_unique": "United Kingdom;Australia;United States;Germany" }, { "title": "A Structured Prediction Approach for Label Ranking", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11858", "id": "11858", "author_site": "Anna Korba, Alexandre Garcia, Florence d'Alch\u00e9-Buc", "author": "Anna Korba; Alexandre Garcia; Florence d'Alch\u00e9-Buc", "abstract": "We propose to solve a label ranking problem as a structured output regression task. In this view, we adopt a least square surrogate loss\napproach that solves a supervised learning problem in two steps:\na regression step in a well-chosen feature space and a pre-image (or decoding) step. We use specific feature maps/embeddings for ranking data, which convert any ranking/permutation into a vector representation. These embeddings are all well-tailored for our approach, either by resulting in consistent estimators, or by solving trivially the pre-image problem which is often the bottleneck in structured prediction. Their extension to the case of incomplete or partial rankings is also discussed. Finally, we provide empirical results on synthetic and real-world datasets showing the relevance of our method.", "bibtex": "@inproceedings{NEURIPS2018_b3dd760e,\n author = {Korba, Anna and Garcia, Alexandre and d\\textquotesingle Alch\\'{e}-Buc, Florence},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Structured Prediction Approach for Label Ranking},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b3dd760eb02d2e669c604f6b2f1e803f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b3dd760eb02d2e669c604f6b2f1e803f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b3dd760eb02d2e669c604f6b2f1e803f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b3dd760eb02d2e669c604f6b2f1e803f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b3dd760eb02d2e669c604f6b2f1e803f-Reviews.html", "metareview": "", "pdf_size": 364763, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7075820179073932212&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "LTCI, T\u00e9l\u00e9com ParisTech; LTCI, T\u00e9l\u00e9com ParisTech; LTCI, T\u00e9l\u00e9com ParisTech", "aff_domain": "telecom-paristech.fr;telecom-paristech.fr;telecom-paristech.fr", "email": "telecom-paristech.fr;telecom-paristech.fr;telecom-paristech.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b3dd760eb02d2e669c604f6b2f1e803f-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "T\u00e9l\u00e9com ParisTech", "aff_unique_dep": "LTCI", "aff_unique_url": "https://www.telecom-paris.fr", "aff_unique_abbr": "T\u00e9l\u00e9com ParisTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "A Theory-Based Evaluation of Nearest Neighbor Models Put Into Practice", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11651", "id": "11651", "author_site": "Hendrik Fichtenberger, Dennis Rohde", "author": "Hendrik Fichtenberger; Dennis Rohde", "abstract": "In the $k$-nearest neighborhood model ($k$-NN), we are given a set of points $P$, and we shall answer queries $q$ by returning the $k$ nearest neighbors of $q$ in $P$ according to some metric. This concept is crucial in many areas of data analysis and data processing, e.g., computer vision, document retrieval and machine learning. Many $k$-NN algorithms have been published and implemented, but often the relation between parameters and accuracy of the computed $k$-NN is not explicit. We study property testing of $k$-NN graphs in theory and evaluate it empirically: given a point set $P \\subset \\mathbb{R}^\\delta$ and a directed graph $G=(P,E)$, is $G$ a $k$-NN graph, i.e., every point $p \\in P$ has outgoing edges to its $k$ nearest neighbors, or is it $\\epsilon$-far from being a $k$-NN graph? Here, $\\epsilon$-far means that one has to change more than an $\\epsilon$-fraction of the edges in order to make $G$ a $k$-NN graph. We develop a randomized algorithm with one-sided error that decides this question, i.e., a property tester for the $k$-NN property, with complexity $O(\\sqrt{n} k^2 / \\epsilon^2)$ measured in terms of the number of vertices and edges it inspects, and we prove a lower bound of $\\Omega(\\sqrt{n / \\epsilon k})$. We evaluate our tester empirically on the $k$-NN models computed by various algorithms and show that it can be used to detect $k$-NN models with bad accuracy in significantly less time than the building time of the $k$-NN model.", "bibtex": "@inproceedings{NEURIPS2018_6463c884,\n author = {Fichtenberger, Hendrik and Rohde, Dennis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Theory-Based Evaluation of Nearest Neighbor Models Put Into Practice},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6463c88460bd63bbe256e495c63aa40b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6463c88460bd63bbe256e495c63aa40b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6463c88460bd63bbe256e495c63aa40b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6463c88460bd63bbe256e495c63aa40b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6463c88460bd63bbe256e495c63aa40b-Reviews.html", "metareview": "", "pdf_size": 483374, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2294589173281809682&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "TU Dortmund; TU Dortmund", "aff_domain": "tu-dortmund.de;cs.tu-dortmund.de", "email": "tu-dortmund.de;cs.tu-dortmund.de", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6463c88460bd63bbe256e495c63aa40b-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Technische Universit\u00e4t Dortmund", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-dortmund.de", "aff_unique_abbr": "TU Dortmund", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "A Unified Feature Disentangler for Multi-Domain Image Translation and Manipulation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11267", "id": "11267", "author_site": "Alexander H. Liu, Yen-Cheng Liu, Yu-Ying Yeh, Frank Wang", "author": "Alexander H. Liu; Yen-Cheng Liu; Yu-Ying Yeh; Yu-Chiang Frank Wang", "abstract": "We present a novel and unified deep learning framework which is capable of learning domain-invariant representation from data across multiple domains. Realized by adversarial training with additional ability to exploit domain-specific information, the proposed network is able to perform continuous cross-domain image translation and manipulation, and produces desirable output images accordingly. In addition, the resulting feature representation exhibits superior performance of unsupervised domain adaptation, which also verifies the effectiveness of the proposed model in learning disentangled features for describing cross-domain data.", "bibtex": "@inproceedings{NEURIPS2018_84438b7a,\n author = {Liu, Alexander H. and Liu, Yen-Cheng and Yeh, Yu-Ying and Wang, Yu-Chiang Frank},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Unified Feature Disentangler for Multi-Domain Image Translation and Manipulation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/84438b7aae55a0638073ef798e50b4ef-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/84438b7aae55a0638073ef798e50b4ef-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/84438b7aae55a0638073ef798e50b4ef-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/84438b7aae55a0638073ef798e50b4ef-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/84438b7aae55a0638073ef798e50b4ef-Reviews.html", "metareview": "", "pdf_size": 1344827, "gs_citation": 491, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6007789913986445498&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "National Taiwan University, Taiwan; Georgia Institute of Technology, USA; University of California, San Diego, USA; National Taiwan University, Taiwan + MOST Joint Research Center for AI Technology and All Vista Healthcare, Taiwan", "aff_domain": "ntu.edu.tw;gatech.edu;eng.ucsd.edu;ntu.edu.tw", "email": "ntu.edu.tw;gatech.edu;eng.ucsd.edu;ntu.edu.tw", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/84438b7aae55a0638073ef798e50b4ef-Abstract.html", "aff_unique_index": "0;1;2;0+3", "aff_unique_norm": "National Taiwan University;Georgia Institute of Technology;University of California, San Diego;MOST Joint Research Center for AI Technology", "aff_unique_dep": ";;;AI Technology", "aff_unique_url": "https://www.ntu.edu.tw;https://www.gatech.edu;https://ucsd.edu;", "aff_unique_abbr": "NTU;Georgia Tech;UCSD;", "aff_campus_unique_index": "0;2;0+0", "aff_campus_unique": "Taiwan;;San Diego", "aff_country_unique_index": "0;1;1;0+0", "aff_country_unique": "China;United States" }, { "title": "A Unified Framework for Extensive-Form Game Abstraction with Bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11084", "id": "11084", "author_site": "Christian Kroer, Tuomas Sandholm", "author": "Christian Kroer; Tuomas Sandholm", "abstract": "Part of", "bibtex": "@inproceedings{NEURIPS2018_aa942ab2,\n author = {Kroer, Christian and Sandholm, Tuomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Unified Framework for Extensive-Form Game Abstraction with Bounds},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/aa942ab2bfa6ebda4840e7360ce6e7ef-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/aa942ab2bfa6ebda4840e7360ce6e7ef-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/aa942ab2bfa6ebda4840e7360ce6e7ef-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/aa942ab2bfa6ebda4840e7360ce6e7ef-Reviews.html", "metareview": "", "pdf_size": 736815, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10767367771719411226&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Computer Science Department, Pittsburgh, PA 15213; Computer Science Department, Pittsburgh, PA 15213", "aff_domain": "cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/aa942ab2bfa6ebda4840e7360ce6e7ef-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Pittsburgh", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.pitt.edu", "aff_unique_abbr": "Pitt", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pittsburgh", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Unified View of Piecewise Linear Neural Network Verification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11470", "id": "11470", "author_site": "Rudy Bunel, Ilker Turkaslan, Philip Torr, Pushmeet Kohli, Pawan K Mudigonda", "author": "Rudy R Bunel; Ilker Turkaslan; Philip Torr; Pushmeet Kohli; Pawan K Mudigonda", "abstract": "The success of Deep Learning and its potential use in many safety-critical\n applications has motivated research on formal verification of Neural Network\n (NN) models. Despite the reputation of learned NN models to behave as black\n boxes and the theoretical hardness of proving their properties, researchers\n have been successful in verifying some classes of models by exploiting their\n piecewise linear structure and taking insights from formal methods such as\n Satisifiability Modulo Theory. These methods are however still far from\n scaling to realistic neural networks. To facilitate progress on this crucial\n area, we make two key contributions. First, we present a unified framework\n that encompasses previous methods. This analysis results in the identification\n of new methods that combine the strengths of multiple existing approaches,\n accomplishing a speedup of two orders of magnitude compared to the previous\n state of the art. Second, we propose a new data set of benchmarks which\n includes a collection of previously released testcases. We use the benchmark\n to provide the first experimental comparison of existing algorithms and\n identify the factors impacting the hardness of verification problems.", "bibtex": "@inproceedings{NEURIPS2018_be53d253,\n author = {Bunel, Rudy R and Turkaslan, Ilker and Torr, Philip and Kohli, Pushmeet and Mudigonda, Pawan K},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Unified View of Piecewise Linear Neural Network Verification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/be53d253d6bc3258a8160556dda3e9b2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/be53d253d6bc3258a8160556dda3e9b2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/be53d253d6bc3258a8160556dda3e9b2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/be53d253d6bc3258a8160556dda3e9b2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/be53d253d6bc3258a8160556dda3e9b2-Reviews.html", "metareview": "", "pdf_size": 532361, "gs_citation": 468, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5109084814333031747&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "University of Oxford; University of Oxford; University of Oxford; Deepmind; University of Oxford + Alan Turing Institute", "aff_domain": "robots.ox.ac.uk;lmh.ox.ac.uk;eng.ox.ac.uk;google.com;robots.ox.ac.uk", "email": "robots.ox.ac.uk;lmh.ox.ac.uk;eng.ox.ac.uk;google.com;robots.ox.ac.uk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/be53d253d6bc3258a8160556dda3e9b2-Abstract.html", "aff_unique_index": "0;0;0;1;0+2", "aff_unique_norm": "University of Oxford;DeepMind;Alan Turing Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ox.ac.uk;https://deepmind.com;https://www.turing.ac.uk", "aff_unique_abbr": "Oxford;DeepMind;ATI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0+0", "aff_country_unique": "United Kingdom" }, { "title": "A convex program for bilinear inversion of sparse vectors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11817", "id": "11817", "author_site": "Alireza Aghasi, Ali Ahmed, Paul Hand, Babhru Joshi", "author": "Alireza Aghasi; Ali Ahmed; Paul Hand; Babhru Joshi", "abstract": "We consider the bilinear inverse problem of recovering two vectors, x in R^L and w in R^L, from their entrywise product. We consider the case where x and w have known signs and are sparse with respect to known dictionaries of size K and N, respectively. Here, K and N may be larger than, smaller than, or equal to L. We introduce L1-BranchHull, which is a convex program posed in the natural parameter space and does not require an approximate solution or initialization in order to be stated or solved. We study the case where x and w are S1- and S2-sparse with respect to a random dictionary, with the sparse vectors satisfying an effective sparsity condition, and present a recovery guarantee that depends on the number of measurements as L > Omega(S1+S2)(log(K+N))^2. Numerical experiments verify that the scaling constant in the theorem is not too large. One application of this problem is the sweep distortion removal task in dielectric imaging, where one of the signals is a nonnegative reflectivity, and the other signal lives in a known subspace, for example that given by dominant wavelet coefficients. We also introduce a variants of L1-BranchHull for the purposes of tolerating noise and outliers, and for the purpose of recovering piecewise constant signals. We provide an ADMM implementation of these variants and show they can extract piecewise constant behavior from real images.", "bibtex": "@inproceedings{NEURIPS2018_482db0ec,\n author = {Aghasi, Alireza and Ahmed, Ali and Hand, Paul and Joshi, Babhru},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A convex program for bilinear inversion of sparse vectors},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/482db0ecc10b8a9984ae850c9ada9899-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/482db0ecc10b8a9984ae850c9ada9899-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/482db0ecc10b8a9984ae850c9ada9899-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/482db0ecc10b8a9984ae850c9ada9899-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/482db0ecc10b8a9984ae850c9ada9899-Reviews.html", "metareview": "", "pdf_size": 3386660, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2379963249657699986&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Georgia State Business School, GSU, GA; Dept. of Electrical Engineering, ITU, Lahore; Dept. of Mathematics and College of Computer and Information Science, Northeastern University, MA; Dept. of Computational and Applied Mathematics, Rice University, TX", "aff_domain": "gsu.edu;itu.edu.pk;northeastern.edu;rice.edu", "email": "gsu.edu;itu.edu.pk;northeastern.edu;rice.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/482db0ecc10b8a9984ae850c9ada9899-Abstract.html", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Georgia State University;ITU Lahore;Northeastern University;Rice University", "aff_unique_dep": "J. Mack Robinson College of Business;Dept. of Electrical Engineering;Dept. of Mathematics;Dept. of Computational and Applied Mathematics", "aff_unique_url": "https://www.gsu.edu;https://www.itu.edu.pk;https://www.northeastern.edu;https://www.rice.edu", "aff_unique_abbr": "GSU;ITU;NU;Rice", "aff_campus_unique_index": "0;1;2;3", "aff_campus_unique": "Atlanta;Lahore;MA;Houston", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Pakistan" }, { "title": "A flexible model for training action localization with varying levels of supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11115", "id": "11115", "author_site": "Guilhem Ch\u00e9ron, Jean-Baptiste Alayrac, Ivan Laptev, Cordelia Schmid", "author": "Guilhem Ch\u00e9ron; Jean-Baptiste Alayrac; Ivan Laptev; Cordelia Schmid", "abstract": "Spatio-temporal action detection in videos is typically addressed in a fully-supervised setup with manual annotation of training videos required at every frame. Since such annotation is extremely tedious and prohibits scalability, there is a clear need to minimize the amount of manual supervision. In this work we propose a unifying framework that can handle and combine varying types of less demanding weak supervision. Our model is based on discriminative clustering and integrates different types of supervision as constraints on the optimization. We investigate applications of such a model to training setups with alternative supervisory signals ranging from video-level class labels over temporal points or sparse action bounding boxes to the full per-frame annotation of action bounding boxes. Experiments on the challenging UCF101-24 and DALY datasets demonstrate competitive performance of our method at a fraction of supervision used by previous methods. The flexibility of our model enables joint learning from data with different levels of annotation. Experimental results demonstrate a significant gain by adding a few fully supervised examples to otherwise weakly labeled videos.", "bibtex": "@inproceedings{NEURIPS2018_53fde96f,\n author = {Ch\\'{e}ron, Guilhem and Alayrac, Jean-Baptiste and Laptev, Ivan and Schmid, Cordelia},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A flexible model for training action localization with varying levels of supervision},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/53fde96fcc4b4ce72d7739202324cd49-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/53fde96fcc4b4ce72d7739202324cd49-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/53fde96fcc4b4ce72d7739202324cd49-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/53fde96fcc4b4ce72d7739202324cd49-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/53fde96fcc4b4ce72d7739202324cd49-Reviews.html", "metareview": "", "pdf_size": 721971, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12745987706790622376&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/53fde96fcc4b4ce72d7739202324cd49-Abstract.html" }, { "title": "A loss framework for calibrated anomaly detection", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11164", "id": "11164", "author_site": "Aditya Menon, Robert Williamson", "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2912204810691945639&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "author": "", "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/959a557f5f6beb411fd954f3f34b21c3-Abstract.html" }, { "title": "A no-regret generalization of hierarchical softmax to extreme multi-label classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11615", "id": "11615", "author_site": "Marek Wydmuch, Kalina Jasinska-Kobus, Mikhail Kuznetsov, R\u00f3bert Busa-Fekete, Krzysztof Dembczynski", "author": "Marek Wydmuch; Kalina Jasinska; Mikhail Kuznetsov; R\u00f3bert Busa-Fekete; Krzysztof Dembczynski", "abstract": "Extreme multi-label classification (XMLC) is a problem of tagging an instance with a small subset of relevant labels chosen from an extremely large pool of possible labels. Large label spaces can be efficiently handled by organizing labels as a tree, like in the hierarchical softmax (HSM) approach commonly used for multi-class problems. In this paper, we investigate probabilistic label trees (PLTs) that have been recently devised for tackling XMLC problems. We show that PLTs are a no-regret multi-label generalization of HSM when precision@$k$ is used as a model evaluation metric. Critically, we prove that pick-one-label heuristic---a reduction technique from multi-label to multi-class that is routinely used along with HSM---is not consistent in general. We also show that our implementation of PLTs, referred to as extremeText (XT), obtains significantly better results than HSM with the pick-one-label heuristic and XML-CNN, a deep network specifically designed for XMLC problems. Moreover, XT is competitive to many state-of-the-art approaches in terms of statistical performance, model size and prediction time which makes it amenable to deploy in an online system.", "bibtex": "@inproceedings{NEURIPS2018_8b838818,\n author = {Wydmuch, Marek and Jasinska, Kalina and Kuznetsov, Mikhail and Busa-Fekete, R\\'{o}bert and Dembczynski, Krzysztof},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A no-regret generalization of hierarchical softmax to extreme multi-label classification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8b8388180314a337c9aa3c5aa8e2f37a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8b8388180314a337c9aa3c5aa8e2f37a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8b8388180314a337c9aa3c5aa8e2f37a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8b8388180314a337c9aa3c5aa8e2f37a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8b8388180314a337c9aa3c5aa8e2f37a-Reviews.html", "metareview": "", "pdf_size": 600701, "gs_citation": 119, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14171307998042582918&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Institute of Computing Science, Poznan University of Technology, Poland; Institute of Computing Science, Poznan University of Technology, Poland; Yahoo! Research, New York, USA; Yahoo! Research, New York, USA; Institute of Computing Science, Poznan University of Technology, Poland", "aff_domain": "cs.put.poznan.pl;cs.put.poznan.pl;oath.com;oath.com;cs.put.poznan.pl", "email": "cs.put.poznan.pl;cs.put.poznan.pl;oath.com;oath.com;cs.put.poznan.pl", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8b8388180314a337c9aa3c5aa8e2f37a-Abstract.html", "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Poznan University of Technology;Yahoo! Research", "aff_unique_dep": "Institute of Computing Science;", "aff_unique_url": "https://www.put.poznan.pl/;https://research.yahoo.com", "aff_unique_abbr": "PUT;Yahoo! Res", "aff_campus_unique_index": "0;0;1;1;0", "aff_campus_unique": "Poznan;New York", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Poland;United States" }, { "title": "A probabilistic population code based on neural samples", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11681", "id": "11681", "author_site": "Sabyasachi Shivkumar, Richard Lange, Ankani Chattoraj, Ralf Haefner", "author": "Sabyasachi Shivkumar; Richard Lange; Ankani Chattoraj; Ralf Haefner", "abstract": "Sensory processing is often characterized as implementing probabilistic inference: networks of neurons compute posterior beliefs over unobserved causes given the sensory inputs. How these beliefs are computed and represented by neural responses is much-debated (Fiser et al. 2010, Pouget et al. 2013). A central debate concerns the question of whether neural responses represent samples of latent variables (Hoyer & Hyvarinnen 2003) or parameters of their distributions (Ma et al. 2006) with efforts being made to distinguish between them (Grabska-Barwinska et al. 2013).\nA separate debate addresses the question of whether neural responses are proportionally related to the encoded probabilities (Barlow 1969), or proportional to the logarithm of those probabilities (Jazayeri & Movshon 2006, Ma et al. 2006, Beck et al. 2012). \nHere, we show that these alternatives -- contrary to common assumptions -- are not mutually exclusive and that the very same system can be compatible with all of them.\nAs a central analytical result, we show that modeling neural responses in area V1 as samples from a posterior distribution over latents in a linear Gaussian model of the image implies that those neural responses form a linear Probabilistic Population Code (PPC, Ma et al. 2006). In particular, the posterior distribution over some experimenter-defined variable like \"orientation\" is part of the exponential family with sufficient statistics that are linear in the neural sampling-based firing rates.", "bibtex": "@inproceedings{NEURIPS2018_5401acfe,\n author = {Shivkumar, Sabyasachi and Lange, Richard and Chattoraj, Ankani and Haefner, Ralf},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A probabilistic population code based on neural samples},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5401acfe633e6817b508b84d23686743-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5401acfe633e6817b508b84d23686743-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5401acfe633e6817b508b84d23686743-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5401acfe633e6817b508b84d23686743-Reviews.html", "metareview": "", "pdf_size": 559514, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15376232053133296361&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Brain and Cognitive Sciences, University of Rochester; Brain and Cognitive Sciences, University of Rochester; Brain and Cognitive Sciences, University of Rochester; Brain and Cognitive Sciences, University of Rochester", "aff_domain": "ur.rochester.edu;ur.rochester.edu;ur.rochester.edu;ur.rochester.edu", "email": "ur.rochester.edu;ur.rochester.edu;ur.rochester.edu;ur.rochester.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5401acfe633e6817b508b84d23686743-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Rochester", "aff_unique_dep": "Brain and Cognitive Sciences", "aff_unique_url": "https://www.rochester.edu", "aff_unique_abbr": "U of R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A theory on the absence of spurious solutions for nonconvex and nonsmooth optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11253", "id": "11253", "author_site": "Cedric Josz, Yi Ouyang, Richard Zhang, Javad Lavaei, Somayeh Sojoudi", "author": "Cedric Josz; Yi Ouyang; Richard Zhang; Javad Lavaei; Somayeh Sojoudi", "abstract": "We study the set of continuous functions that admit no spurious local optima (i.e. local minima that are not global minima) which we term global functions. They satisfy various powerful properties for analyzing nonconvex and nonsmooth optimization problems. For instance, they satisfy a theorem akin to the fundamental uniform limit theorem in the analysis regarding continuous functions. Global functions are also endowed with useful properties regarding the composition of functions and change of variables. Using these new results, we show that a class of non-differentiable nonconvex optimization problems arising in tensor decomposition applications are global functions. This is the first result concerning nonconvex methods for nonsmooth objective functions. Our result provides a theoretical guarantee for the widely-used $\\ell_1$ norm to avoid outliers in nonconvex optimization.", "bibtex": "@inproceedings{NEURIPS2018_b3ba8f1b,\n author = {Josz, Cedric and Ouyang, Yi and Zhang, Richard and Lavaei, Javad and Sojoudi, Somayeh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A theory on the absence of spurious solutions for nonconvex and nonsmooth optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b3ba8f1bee1238a2f37603d90b58898d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b3ba8f1bee1238a2f37603d90b58898d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b3ba8f1bee1238a2f37603d90b58898d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b3ba8f1bee1238a2f37603d90b58898d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b3ba8f1bee1238a2f37603d90b58898d-Reviews.html", "metareview": "", "pdf_size": 540160, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17421513347745253262&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "EECS, UC Berkeley; IEOR, UC Berkeley; IEOR, UC Berkeley; IEOR, UC Berkeley; EECS, UC Berkeley", "aff_domain": "gmail.com;gmail.com;berkeley.edu;berkeley.edu;berkeley.edu", "email": "gmail.com;gmail.com;berkeley.edu;berkeley.edu;berkeley.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b3ba8f1bee1238a2f37603d90b58898d-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Electrical Engineering and Computer Sciences", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "ATOMO: Communication-efficient Learning via Atomic Sparsification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11935", "id": "11935", "author_site": "Hongyi Wang, Scott Sievert, Shengchao Liu, Zachary Charles, Dimitris Papailiopoulos, Stephen Wright", "author": "Hongyi Wang; Scott Sievert; Shengchao Liu; Zachary Charles; Dimitris Papailiopoulos; Stephen Wright", "abstract": "Distributed model training suffers from communication overheads due to frequent gradient updates transmitted between compute nodes. To mitigate these overheads, several studies propose the use of sparsified stochastic gradients. We argue that these are facets of a general sparsification method that can operate on any possible atomic decomposition. Notable examples include element-wise, singular value, and Fourier decompositions. We present ATOMO, a general framework for atomic sparsification of stochastic gradients. Given a gradient, an atomic decomposition, and a sparsity budget, ATOMO gives a random unbiased sparsification of the atoms minimizing variance. We show that recent methods such as QSGD and TernGrad are special cases of ATOMO, and that sparsifiying the singular value decomposition of neural networks gradients, rather than their coordinates, can lead to significantly faster distributed training.", "bibtex": "@inproceedings{NEURIPS2018_33b3214d,\n author = {Wang, Hongyi and Sievert, Scott and Liu, Shengchao and Charles, Zachary and Papailiopoulos, Dimitris and Wright, Stephen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {ATOMO: Communication-efficient Learning via Atomic Sparsification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/33b3214d792caf311e1f00fd22b392c5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/33b3214d792caf311e1f00fd22b392c5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/33b3214d792caf311e1f00fd22b392c5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/33b3214d792caf311e1f00fd22b392c5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/33b3214d792caf311e1f00fd22b392c5-Reviews.html", "metareview": "", "pdf_size": 4334306, "gs_citation": 438, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8287483998499358971&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Sciences; Department of Electrical and Computer Engineering; Department of Electrical and Computer Engineering; Department of Computer Sciences; Department of Computer Sciences; Department of Electrical and Computer Engineering", "aff_domain": "; ; ; ; ; ", "email": "; ; ; ; ; ", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/33b3214d792caf311e1f00fd22b392c5-Abstract.html", "aff_unique_index": "0;1;1;0;0;1", "aff_unique_norm": "University of Wisconsin-Madison;Unknown Institution", "aff_unique_dep": "Department of Computer Sciences;Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.cs.wisc.edu;", "aff_unique_abbr": "UW-Madison;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "title": "A^2-Nets: Double Attention Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11060", "id": "11060", "author_site": "Yunpeng Chen, Yannis Kalantidis, Jianshu Li, Shuicheng Yan, Jiashi Feng", "author": "Yunpeng Chen; Yannis Kalantidis; Jianshu Li; Shuicheng Yan; Jiashi Feng", "abstract": "Learning to capture long-range relations is fundamental to image/video recognition. Existing CNN models generally rely on increasing depth to model such relations which is highly inefficient. In this work, we propose the \u201cdouble attention block\u201d, a novel component that aggregates and propagates informative global features from the entire spatio-temporal space of input images/videos, enabling subsequent convolution layers to access features from the entire space efficiently. The component is designed with a double attention mechanism in two steps, where the first step gathers features from the entire space into a compact set through second-order attention pooling and the second step adaptively selects and distributes features to each location via another attention. The proposed double attention block is easy to adopt and can be plugged into existing deep neural networks conveniently. We conduct extensive ablation studies and experiments on both image and video recognition tasks for evaluating its performance. On the image recognition task, a ResNet-50 equipped with our double attention blocks outperforms a much larger ResNet-152 architecture on ImageNet-1k dataset with over 40% less the number of parameters and less FLOPs. On the action recognition task, our proposed model achieves the state-of-the-art results on the Kinetics and UCF-101 datasets with significantly higher efficiency than recent works.", "bibtex": "@inproceedings{NEURIPS2018_e1654211,\n author = {Chen, Yunpeng and Kalantidis, Yannis and Li, Jianshu and Yan, Shuicheng and Feng, Jiashi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A\\^{}2-Nets: Double Attention Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e165421110ba03099a1c0393373c5b43-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e165421110ba03099a1c0393373c5b43-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e165421110ba03099a1c0393373c5b43-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e165421110ba03099a1c0393373c5b43-Reviews.html", "metareview": "", "pdf_size": 804401, "gs_citation": 734, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13399556478950496959&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "National University of Singapore; Facebook Research; National University of Singapore; Qihoo 360 AI Institute+National University of Singapore; National University of Singapore", "aff_domain": "u.nus.edu;fb.com;u.nus.edu;nus.edu.sg;nus.edu.sg", "email": "u.nus.edu;fb.com;u.nus.edu;nus.edu.sg;nus.edu.sg", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e165421110ba03099a1c0393373c5b43-Abstract.html", "aff_unique_index": "0;1;0;2+0;0", "aff_unique_norm": "National University of Singapore;Meta;Qihoo 360", "aff_unique_dep": ";Facebook Research;AI Institute", "aff_unique_url": "https://www.nus.edu.sg;https://research.facebook.com;https://www.qihoo.net", "aff_unique_abbr": "NUS;FB Research;Qihoo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2+0;0", "aff_country_unique": "Singapore;United States;China" }, { "title": "Accelerated Stochastic Matrix Inversion: General Theory and Speeding up BFGS Rules for Faster Second-Order Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11176", "id": "11176", "author_site": "Robert Gower, Filip Hanzely, Peter Richtarik, Sebastian Stich", "author": "Robert Gower; Filip Hanzely; Peter Richtarik; Sebastian U Stich", "abstract": "We present the first accelerated randomized algorithm for solving linear systems in Euclidean spaces. One essential problem of this type is the matrix inversion problem. In particular, our algorithm can be specialized to invert positive definite matrices in such a way that all iterates (approximate solutions) generated by the algorithm are positive definite matrices themselves. This opens the way for many applications in the field of optimization and machine learning. As an application of our general theory, we develop the first accelerated (deterministic and stochastic) quasi-Newton updates. Our updates lead to provably more aggressive approximations of the inverse Hessian, and lead to speed-ups over classical non-accelerated rules in numerical experiments. Experiments with empirical risk minimization show that our rules can accelerate training of machine learning models.", "bibtex": "@inproceedings{NEURIPS2018_d554f7bb,\n author = {Gower, Robert and Hanzely, Filip and Richtarik, Peter and Stich, Sebastian U},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Accelerated Stochastic Matrix Inversion: General Theory and Speeding up BFGS Rules for Faster Second-Order Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d554f7bb7be44a7267068a7df88ddd20-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d554f7bb7be44a7267068a7df88ddd20-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d554f7bb7be44a7267068a7df88ddd20-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d554f7bb7be44a7267068a7df88ddd20-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d554f7bb7be44a7267068a7df88ddd20-Reviews.html", "metareview": "", "pdf_size": 712731, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2761190856905651969&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 20, "aff": "T\u00e9l\u00e9com ParisTech, Paris, France; KAUST, Thuwal, Saudi Arabia; KAUST, Thuwal, Saudi Arabia + University of Edinburgh + Moscow Institute of Physics and Technology; EPFL, Lausanne, Switzerland", "aff_domain": "telecom-paristech.fr;kaust.edu.sa;kaust.edu.sa;epfl.ch", "email": "telecom-paristech.fr;kaust.edu.sa;kaust.edu.sa;epfl.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d554f7bb7be44a7267068a7df88ddd20-Abstract.html", "aff_unique_index": "0;1;1+2+3;4", "aff_unique_norm": "T\u00e9l\u00e9com ParisTech;King Abdullah University of Science and Technology;University of Edinburgh;Moscow Institute of Physics and Technology;EPFL", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.telecom-paristech.fr;https://www.kaust.edu.sa;https://www.ed.ac.uk;https://www.mipt.ru/en;https://www.epfl.ch", "aff_unique_abbr": "TP;KAUST;Edinburgh;MIPT;EPFL", "aff_campus_unique_index": "0;1;1;3", "aff_campus_unique": "Paris;Thuwal;;Lausanne", "aff_country_unique_index": "0;1;1+2+3;4", "aff_country_unique": "France;Saudi Arabia;United Kingdom;Russian Federation;Switzerland" }, { "title": "Acceleration through Optimistic No-Regret Dynamics", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11381", "id": "11381", "author_site": "Jun-Kun Wang, Jacob Abernethy", "author": "Jun-Kun Wang; Jacob D. Abernethy", "abstract": "We consider the problem of minimizing a smooth convex function by reducing the optimization to computing the Nash equilibrium of a particular zero-sum convex-concave game. Zero-sum games can be solved using online learning dynamics, where a classical technique involves simulating two no-regret algorithms that play against each other and, after $T$ rounds, the average iterate is guaranteed to solve the original optimization problem with error decaying as $O(\\log T/T)$.\nIn this paper we show that the technique can be enhanced to a rate of $O(1/T^2)$ by extending recent work \\cite{RS13,SALS15} that leverages \\textit{optimistic learning} to speed up equilibrium computation. The resulting optimization algorithm derived from this analysis coincides \\textit{exactly} with the well-known \\NA \\cite{N83a} method, and indeed the same story allows us to recover several variants of the Nesterov's algorithm via small tweaks. We are also able to establish the accelerated linear rate for a function which is both strongly-convex and smooth. This methodology unifies a number of different iterative optimization methods: we show that the \\HB algorithm is precisely the non-optimistic variant of \\NA, and recent prior work already established a similar perspective on \\FW \\cite{AW17,ALLW18}.", "bibtex": "@inproceedings{NEURIPS2018_e06f967f,\n author = {Wang, Jun-Kun and Abernethy, Jacob D},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Acceleration through Optimistic No-Regret Dynamics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e06f967fb0d355592be4e7674fa31d26-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e06f967fb0d355592be4e7674fa31d26-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e06f967fb0d355592be4e7674fa31d26-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e06f967fb0d355592be4e7674fa31d26-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e06f967fb0d355592be4e7674fa31d26-Reviews.html", "metareview": "", "pdf_size": 337852, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11800946119053918199&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "College of Computing, Georgia Institute of Technology; College of Computing, Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu", "email": "gatech.edu;gatech.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e06f967fb0d355592be4e7674fa31d26-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "College of Computing", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Atlanta", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Active Learning for Non-Parametric Regression Using Purely Random Trees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11262", "id": "11262", "author_site": "Jack Goetz, Ambuj Tewari, Paul Zimmerman", "author": "Jack Goetz; Ambuj Tewari; Paul Zimmerman", "abstract": "Active learning is the task of using labelled data to select additional points to label, with the goal of fitting the most accurate model with a fixed budget of labelled points. In binary classification active learning is known to produce faster rates than passive learning for a broad range of settings. However in regression restrictive structure and tailored methods were previously needed to obtain theoretically superior performance. In this paper we propose an intuitive tree based active learning algorithm for non-parametric regression with provable improvement over random sampling. When implemented with Mondrian Trees our algorithm is tuning parameter free, consistent and minimax optimal for Lipschitz functions.", "bibtex": "@inproceedings{NEURIPS2018_dc4c44f6,\n author = {Goetz, Jack and Tewari, Ambuj and Zimmerman, Paul},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Active Learning for Non-Parametric Regression Using Purely Random Trees},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dc4c44f624d600aa568390f1f1104aa0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dc4c44f624d600aa568390f1f1104aa0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dc4c44f624d600aa568390f1f1104aa0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dc4c44f624d600aa568390f1f1104aa0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dc4c44f624d600aa568390f1f1104aa0-Reviews.html", "metareview": "", "pdf_size": 326552, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7681049792975239576&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "University of Michigan; University of Michigan; University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu", "email": "umich.edu;umich.edu;umich.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dc4c44f624d600aa568390f1f1104aa0-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Active Matting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11452", "id": "11452", "author_site": "Xin Yang, Ke Xu, Shaozhe Chen, Shengfeng He, Baocai Yin Yin, Rynson Lau", "author": "Xin Yang; Ke Xu; Shaozhe Chen; Shengfeng He; Baocai Yin Yin; Rynson Lau", "abstract": "Image matting is an ill-posed problem. It requires a user input trimap or some strokes to obtain an alpha matte of the foreground object. A fine user input is essential to obtain a good result, which is either time consuming or suitable for experienced users who know where to place the strokes. In this paper, we explore the intrinsic relationship between the user input and the matting algorithm to address the problem of where and when the user should provide the input. Our aim is to discover the most informative sequence of regions for user input in order to produce a good alpha matte with minimum labeling efforts. To this end, we propose an active matting method with recurrent reinforcement learning. The proposed framework involves human in the loop by sequentially detecting informative regions for trivial human judgement. Comparing to traditional matting algorithms, the proposed framework requires much less efforts, and can produce satisfactory results with just 10 regions. Through extensive experiments, we show that the proposed model reduces user efforts significantly and achieves comparable performance to dense trimaps in a user-friendly manner. We further show that the learned informative knowledge can be generalized across different matting algorithms.", "bibtex": "@inproceedings{NEURIPS2018_653ac11c,\n author = {Yang, Xin and Xu, Ke and Chen, Shaozhe and He, Shengfeng and Yin, Baocai Yin and Lau, Rynson},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Active Matting},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/653ac11ca60b3e021a8c609c7198acfc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/653ac11ca60b3e021a8c609c7198acfc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/653ac11ca60b3e021a8c609c7198acfc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/653ac11ca60b3e021a8c609c7198acfc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/653ac11ca60b3e021a8c609c7198acfc-Reviews.html", "metareview": "", "pdf_size": 11368216, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16644807993801550851&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Dalian University of Technology+City University of Hong Kong; Dalian University of Technology+City University of Hong Kong; Dalian University of Technology; South China University of Technology; Dalian University of Technology; City University of Hong Kong", "aff_domain": "dlut.edu.cn;mail.dlut.edu.cn;mail.dlut.edu.cn;scut.edu.cn;dlut.edu.cn;cityu.edu.hk", "email": "dlut.edu.cn;mail.dlut.edu.cn;mail.dlut.edu.cn;scut.edu.cn;dlut.edu.cn;cityu.edu.hk", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/653ac11ca60b3e021a8c609c7198acfc-Abstract.html", "aff_unique_index": "0+1;0+1;0;2;0;1", "aff_unique_norm": "Dalian University of Technology;City University of Hong Kong;South China University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.dlut.edu.cn/;https://www.cityu.edu.hk;https://www.scut.edu.cn", "aff_unique_abbr": "DUT;CityU;SCUT", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0+0;0+0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Actor-Critic Policy Optimization in Partially Observable Multiagent Environments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11344", "id": "11344", "author_site": "Sriram Srinivasan, Marc Lanctot, Vinicius Zambaldi, Julien Perolat, Karl Tuyls, Remi Munos, Michael Bowling", "author": "Sriram Srinivasan; Marc Lanctot; Vinicius Zambaldi; Julien Perolat; Karl Tuyls; Remi Munos; Michael Bowling", "abstract": "Optimization of parameterized policies for reinforcement learning (RL) is an important and challenging problem in artificial intelligence. Among the most common approaches are algorithms based on gradient ascent of a score function representing discounted return. In this paper, we examine the role of these policy gradient and actor-critic algorithms in partially-observable multiagent environments. We show several candidate policy update rules and relate them to a foundation of regret minimization and multiagent learning techniques for the one-shot and tabular cases, leading to previously unknown convergence guarantees. We apply our method to model-free multiagent reinforcement learning in adversarial sequential decision problems (zero-sum imperfect information games), using RL-style function approximation. We evaluate on commonly used benchmark Poker domains, showing performance against fixed policies and empirical convergence to approximate Nash equilibria in self-play with rates similar to or better than a baseline model-free algorithm for zero-sum games, without any domain-specific state space reductions.", "bibtex": "@inproceedings{NEURIPS2018_e22dd5da,\n author = {Srinivasan, Sriram and Lanctot, Marc and Zambaldi, Vinicius and Perolat, Julien and Tuyls, Karl and Munos, Remi and Bowling, Michael},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Actor-Critic Policy Optimization in Partially Observable Multiagent Environments},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e22dd5dabde45eda5a1a67772c8e25dd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e22dd5dabde45eda5a1a67772c8e25dd-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e22dd5dabde45eda5a1a67772c8e25dd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e22dd5dabde45eda5a1a67772c8e25dd-Reviews.html", "metareview": "", "pdf_size": 1161201, "gs_citation": 178, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8096003745039146783&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "DeepMind; DeepMind; DeepMind; DeepMind; DeepMind; DeepMind; DeepMind", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e22dd5dabde45eda5a1a67772c8e25dd-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "DeepMind", "aff_unique_dep": "", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Adaptation to Easy Data in Prediction with Limited Advice", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11297", "id": "11297", "author_site": "Tobias Sommer Thune, Yevgeny Seldin", "author": "Tobias Sommer Thune; Yevgeny Seldin", "abstract": "We derive an online learning algorithm with improved regret guarantees for ``easy'' loss sequences. We consider two types of ``easiness'': (a) stochastic loss sequences and (b) adversarial loss sequences with small effective range of the losses. While a number of algorithms have been proposed for exploiting small effective range in the full information setting, Gerchinovitz and Lattimore [2016] have shown the impossibility of regret scaling with the effective range of the losses in the bandit setting. We show that just one additional observation per round is sufficient to circumvent the impossibility result. The proposed Second Order Difference Adjustments (SODA) algorithm requires no prior knowledge of the effective range of the losses, $\\varepsilon$, and achieves an $O(\\varepsilon \\sqrt{KT \\ln K}) + \\tilde{O}(\\varepsilon K \\sqrt[4]{T})$ expected regret guarantee, where $T$ is the time horizon and $K$ is the number of actions. The scaling with the effective loss range is achieved under significantly weaker assumptions than those made by Cesa-Bianchi and Shamir [2018] in an earlier attempt to circumvent the impossibility result. We also provide a regret lower bound of $\\Omega(\\varepsilon\\sqrt{T K})$, which almost matches the upper bound. In addition, we show that in the stochastic setting SODA achieves an $O\\left(\\sum_{a:\\Delta_a>0} \\frac{K\\varepsilon^2}{\\Delta_a}\\right)$ pseudo-regret bound that holds simultaneously with the adversarial regret guarantee. In other words, SODA is safe against an unrestricted oblivious adversary and provides improved regret guarantees for at least two different types of ``easiness'' simultaneously.", "bibtex": "@inproceedings{NEURIPS2018_253f7b5d,\n author = {Thune, Tobias Sommer and Seldin, Yevgeny},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptation to Easy Data in Prediction with Limited Advice},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/253f7b5d921338af34da817c00f42753-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/253f7b5d921338af34da817c00f42753-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/253f7b5d921338af34da817c00f42753-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/253f7b5d921338af34da817c00f42753-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/253f7b5d921338af34da817c00f42753-Reviews.html", "metareview": "", "pdf_size": 370127, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14434004524212668841&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Department of Computer Science, University of Copenhagen; Department of Computer Science, University of Copenhagen", "aff_domain": "di.ku.dk;di.ku.dk", "email": "di.ku.dk;di.ku.dk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/253f7b5d921338af34da817c00f42753-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Denmark" }, { "title": "Adapted Deep Embeddings: A Synthesis of Methods for k-Shot Inductive Transfer Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11035", "id": "11035", "author_site": "Tyler Scott, Karl Ridgeway, Michael Mozer", "author": "Tyler Scott; Karl Ridgeway; Michael Mozer", "abstract": "The focus in machine learning has branched beyond training classifiers on a single task to investigating how previously acquired knowledge in a source domain can be leveraged to facilitate learning in a related target domain, known as inductive transfer learning. Three active lines of research have independently explored transfer learning using neural networks. In weight transfer, a model trained on the source domain is used as an initialization point for a network to be trained on the target domain. In deep metric learning, the source domain is used to construct an embedding that captures class structure in both the source and target domains. In few-shot learning, the focus is on generalizing well in the target domain based on a limited number of labeled examples. We compare state-of-the-art methods from these three paradigms and also explore hybrid adapted-embedding methods that use limited target-domain data to fine tune embeddings constructed from source-domain data. We conduct a systematic comparison of methods in a variety of domains, varying the number of labeled instances available in the target domain (k), as well as the number of target-domain classes. We reach three principal conclusions: (1) Deep embeddings are far superior, compared to weight transfer, as a starting point for inter-domain transfer or model re-use (2) Our hybrid methods robustly outperform every few-shot learning and every deep metric learning method previously proposed, with a mean error reduction of 34% over state-of-the-art. (3) Among loss functions for discovering embeddings, the histogram loss (Ustinova & Lempitsky, 2016) is most robust. We hope our results will motivate a unification of research in weight transfer, deep metric learning, and few-shot learning.", "bibtex": "@inproceedings{NEURIPS2018_d09bf415,\n author = {Scott, Tyler and Ridgeway, Karl and Mozer, Michael C},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adapted Deep Embeddings: A Synthesis of Methods for k-Shot Inductive Transfer Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d09bf41544a3365a46c9077ebb5e35c3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d09bf41544a3365a46c9077ebb5e35c3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d09bf41544a3365a46c9077ebb5e35c3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d09bf41544a3365a46c9077ebb5e35c3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d09bf41544a3365a46c9077ebb5e35c3-Reviews.html", "metareview": "", "pdf_size": 404343, "gs_citation": 118, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11224359097846918125&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science, University of Colorado, Boulder; Department of Computer Science, University of Colorado, Boulder; Department of Computer Science, University of Colorado, Boulder", "aff_domain": "colorado.edu;colorado.edu;colorado.edu", "email": "colorado.edu;colorado.edu;colorado.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d09bf41544a3365a46c9077ebb5e35c3-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Colorado", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.colorado.edu", "aff_unique_abbr": "CU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Boulder", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Learning with Unknown Information Flows", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11719", "id": "11719", "author_site": "Yonatan Gur, Ahmadreza Momeni", "author": "Yonatan Gur; Ahmadreza Momeni", "abstract": "An agent facing sequential decisions that are characterized by partial feedback needs to strike a balance between maximizing immediate payoffs based on available information, and acquiring new information that may be essential for maximizing future payoffs. This trade-off is captured by the multi-armed bandit (MAB) framework that has been studied and applied when at each time epoch payoff observations are collected on the actions that are selected at that epoch. In this paper we introduce a new, generalized MAB formulation in which additional information on each arm may appear arbitrarily throughout the decision horizon, and study the impact of such information flows on the achievable performance and the design of efficient decision-making policies. By obtaining matching lower and upper bounds, we characterize the (regret) complexity of this family of MAB problems as a function of the information flows. We introduce an adaptive exploration policy that, without any prior knowledge of the information arrival process, attains the best performance (in terms of regret rate) that is achievable when the information arrival process is a priori known. Our policy uses dynamically customized virtual time indexes to endogenously control the exploration rate based on the realized information arrival process.", "bibtex": "@inproceedings{NEURIPS2018_9e740b84,\n author = {Gur, Yonatan and Momeni, Ahmadreza},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Learning with Unknown Information Flows},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9e740b84bb48a64dde25061566299467-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9e740b84bb48a64dde25061566299467-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9e740b84bb48a64dde25061566299467-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9e740b84bb48a64dde25061566299467-Reviews.html", "metareview": "", "pdf_size": 363065, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4185491627339331957&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Graduate School of Business, Stanford University; Electrical Engineering Department, Stanford University", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9e740b84bb48a64dde25061566299467-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Graduate School of Business", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Methods for Nonconvex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11930", "id": "11930", "author_site": "Manzil Zaheer, Sashank Reddi, Devendra S Sachan, Satyen Kale, Sanjiv Kumar", "author": "Manzil Zaheer; Sashank Reddi; Devendra Sachan; Satyen Kale; Sanjiv Kumar", "abstract": "Adaptive gradient methods that rely on scaling gradients down by the square root of exponential moving averages of past squared gradients, such RMSProp, Adam, Adadelta have found wide application in optimizing the nonconvex problems that arise in deep learning. However, it has been recently demonstrated that such methods can fail to converge even in simple convex optimization settings. In this work, we provide a new analysis of such methods applied to nonconvex stochastic optimization problems, characterizing the effect of increasing minibatch size. Our analysis shows that under this scenario such methods do converge to stationarity up to the statistical limit of variance in the stochastic gradients (scaled by a constant factor). In particular, our result implies that increasing minibatch sizes enables convergence, thus providing a way to circumvent the non-convergence issues. Furthermore, we provide a new adaptive optimization algorithm, Yogi, which controls the increase in effective learning rate, leading to even better performance with similar theoretical guarantees on convergence. Extensive experiments show that Yogi with very little hyperparameter tuning outperforms methods such as Adam in several challenging machine learning tasks.", "bibtex": "@inproceedings{NEURIPS2018_90365351,\n author = {Zaheer, Manzil and Reddi, Sashank and Sachan, Devendra and Kale, Satyen and Kumar, Sanjiv},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Methods for Nonconvex Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/90365351ccc7437a1309dc64e4db32a3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/90365351ccc7437a1309dc64e4db32a3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/90365351ccc7437a1309dc64e4db32a3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/90365351ccc7437a1309dc64e4db32a3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/90365351ccc7437a1309dc64e4db32a3-Reviews.html", "metareview": "", "pdf_size": 478506, "gs_citation": 523, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13576720529696525340&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Google Research; Google Research; Carnegie Mellon University; Google Research; Google Research", "aff_domain": "google.com;google.com;andrew.cmu.edu;google.com;google.com", "email": "google.com;google.com;andrew.cmu.edu;google.com;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/90365351ccc7437a1309dc64e4db32a3-Abstract.html", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Google;Carnegie Mellon University", "aff_unique_dep": "Google Research;", "aff_unique_url": "https://research.google;https://www.cmu.edu", "aff_unique_abbr": "Google Research;CMU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Negative Curvature Descent with Applications in Non-convex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11476", "id": "11476", "author_site": "Mingrui Liu, Zhe Li, Xiaoyu Wang, Jinfeng Yi, Tianbao Yang", "author": "Mingrui Liu; Zhe Li; Xiaoyu Wang; Jinfeng Yi; Tianbao Yang", "abstract": "Negative curvature descent (NCD) method has been utilized to design deterministic or stochastic algorithms for non-convex optimization aiming at finding second-order stationary points or local minima. In existing studies, NCD needs to approximate the smallest eigen-value of the Hessian matrix with a sufficient precision (e.g., $\\epsilon_2\\ll 1$) in order to achieve a sufficiently accurate second-order stationary solution (i.e., $\\lambda_{\\min}(\\nabla^2 f(\\x))\\geq -\\epsilon_2)$. One issue with this approach is that the target precision $\\epsilon_2$ is usually set to be very small in order to find a high quality solution, which increases the complexity for computing a negative curvature. To address this issue, we propose an adaptive NCD to allow for an adaptive error dependent on the current gradient's magnitude in approximating the smallest eigen-value of the Hessian, and to encourage competition between a noisy NCD step and gradient descent step. We consider the applications of the proposed adaptive NCD for both deterministic and stochastic non-convex optimization, and demonstrate that it can help reduce the the overall complexity in computing the negative curvatures during the course of optimization without sacrificing the iteration complexity.", "bibtex": "@inproceedings{NEURIPS2018_f52854cc,\n author = {Liu, Mingrui and Li, Zhe and Wang, Xiaoyu and Yi, Jinfeng and Yang, Tianbao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Negative Curvature Descent with Applications in Non-convex Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f52854cc99ae1c1966b0a21d0127975b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f52854cc99ae1c1966b0a21d0127975b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f52854cc99ae1c1966b0a21d0127975b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f52854cc99ae1c1966b0a21d0127975b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f52854cc99ae1c1966b0a21d0127975b-Reviews.html", "metareview": "", "pdf_size": 365696, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14336692498209457571&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Computer Science, The University of Iowa, Iowa City, IA 52242, USA; Department of Computer Science, The University of Iowa, Iowa City, IA 52242, USA; Intellifusion; JD AI Research; Department of Computer Science, The University of Iowa, Iowa City, IA 52242, USA", "aff_domain": "uiowa.edu;uiowa.edu; ;jd.com;uiowa.edu", "email": "uiowa.edu;uiowa.edu; ;jd.com;uiowa.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f52854cc99ae1c1966b0a21d0127975b-Abstract.html", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Iowa;Intellifusion;JD", "aff_unique_dep": "Department of Computer Science;;JD AI Research", "aff_unique_url": "https://www.uiowa.edu;https://www.intellifusion.com/;https://www.jd.com", "aff_unique_abbr": "UIowa;;JD AI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Iowa City;", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "United States;China" }, { "title": "Adaptive Online Learning in Dynamic Environments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11149", "id": "11149", "author_site": "Lijun Zhang, Shiyin Lu, Zhi-Hua Zhou", "author": "Lijun Zhang; Shiyin Lu; Zhi-Hua Zhou", "abstract": "In this paper, we study online convex optimization in dynamic environments, and aim to bound the dynamic regret with respect to any sequence of comparators. Existing work have shown that online gradient descent enjoys an $O(\\sqrt{T}(1+P_T))$ dynamic regret, where $T$ is the number of iterations and $P_T$ is the path-length of the comparator sequence. However, this result is unsatisfactory, as there exists a large gap from the $\\Omega(\\sqrt{T(1+P_T)})$ lower bound established in our paper. To address this limitation, we develop a novel online method, namely adaptive learning for dynamic environment (Ader), which achieves an optimal $O(\\sqrt{T(1+P_T)})$ dynamic regret. The basic idea is to maintain a set of experts, each attaining an optimal dynamic regret for a specific path-length, and combines them with an expert-tracking algorithm. Furthermore, we propose an improved Ader based on the surrogate loss, and in this way the number of gradient evaluations per round is reduced from $O(\\log T)$ to $1$. Finally, we extend Ader to the setting that a sequence of dynamical models is available to characterize the comparators.", "bibtex": "@inproceedings{NEURIPS2018_10a5ab2d,\n author = {Zhang, Lijun and Lu, Shiyin and Zhou, Zhi-Hua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Online Learning in Dynamic Environments},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/10a5ab2db37feedfdeaab192ead4ac0e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/10a5ab2db37feedfdeaab192ead4ac0e-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/10a5ab2db37feedfdeaab192ead4ac0e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/10a5ab2db37feedfdeaab192ead4ac0e-Reviews.html", "metareview": "", "pdf_size": 287096, "gs_citation": 223, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12372423789161561659&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing 210023, China; National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing 210023, China; National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing 210023, China", "aff_domain": "lamda.nju.edu.cn;lamda.nju.edu.cn;lamda.nju.edu.cn", "email": "lamda.nju.edu.cn;lamda.nju.edu.cn;lamda.nju.edu.cn", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/10a5ab2db37feedfdeaab192ead4ac0e-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "National Key Laboratory for Novel Software Technology", "aff_unique_url": "http://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Nanjing", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Adaptive Path-Integral Autoencoders: Representation Learning and Planning for Dynamical Systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11852", "id": "11852", "author_site": "Jung-Su Ha, Young-Jin Park, Hyeok-Joo Chae, Soon-Seo Park, Han-Lim Choi", "author": "Jung-Su Ha; Young-Jin Park; Hyeok-Joo Chae; Soon-Seo Park; Han-Lim Choi", "abstract": "We present a representation learning algorithm that learns a low-dimensional latent dynamical system from high-dimensional sequential raw data, e.g., video. The framework builds upon recent advances in amortized inference methods that use both an inference network and a refinement procedure to output samples from a variational distribution given an observation sequence, and takes advantage of the duality between control and inference to approximately solve the intractable inference problem using the path integral control approach. The learned dynamical model can be used to predict and plan the future states; we also present the efficient planning method that exploits the learned low-dimensional latent dynamics. Numerical experiments show that the proposed path-integral control based variational inference method leads to tighter lower bounds in statistical model learning of sequential data. Supplementary video: https://youtu.be/xCp35crUoLQ", "bibtex": "@inproceedings{NEURIPS2018_aa0d2a80,\n author = {Ha, Jung-Su and Park, Young-Jin and Chae, Hyeok-Joo and Park, Soon-Seo and Choi, Han-Lim},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Path-Integral Autoencoders: Representation Learning and Planning for Dynamical Systems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/aa0d2a804a3510442f2fd40f2100b054-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/aa0d2a804a3510442f2fd40f2100b054-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/aa0d2a804a3510442f2fd40f2100b054-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/aa0d2a804a3510442f2fd40f2100b054-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/aa0d2a804a3510442f2fd40f2100b054-Reviews.html", "metareview": "", "pdf_size": 2907955, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9836069857596360357&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Department of Aerospace Engineering & KI for Robotics, KAIST; Department of Aerospace Engineering & KI for Robotics, KAIST; Department of Aerospace Engineering & KI for Robotics, KAIST; Department of Aerospace Engineering & KI for Robotics, KAIST; Department of Aerospace Engineering & KI for Robotics, KAIST", "aff_domain": "lics.;lics.;lics.;lics.;kaist.ac.kr", "email": "lics.;lics.;lics.;lics.;kaist.ac.kr", "github": "https://github.com/yjparkLiCS/18-NeurIPS-APIAE", "project": "https://youtu.be/xCp35crUoLQ", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/aa0d2a804a3510442f2fd40f2100b054-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "KAIST", "aff_unique_dep": "Department of Aerospace Engineering & KI for Robotics", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Adaptive Sampling Towards Fast Graph Representation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11449", "id": "11449", "author_site": "Wenbing Huang, Tong Zhang, Yu Rong, Junzhou Huang", "author": "Wenbing Huang; Tong Zhang; Yu Rong; Junzhou Huang", "abstract": "Graph Convolutional Networks (GCNs) have become a crucial tool on learning representations of graph vertices. The main challenge of adapting GCNs on large-scale graphs is the scalability issue that it incurs heavy cost both in computation and memory due to the uncontrollable neighborhood expansion across layers. In this paper, we accelerate the training of GCNs through developing an adaptive layer-wise sampling method. By constructing the network layer by layer in a top-down passway, we sample the lower layer conditioned on the top one, where the sampled neighborhoods are shared by different parent nodes and the over expansion is avoided owing to the fixed-size sampling. More importantly, the proposed sampler is adaptive and applicable for explicit variance reduction, which in turn enhances the training of our method. Furthermore, we propose a novel and economical approach to promote the message passing over distant nodes by applying skip connections.\nIntensive experiments on several benchmarks verify the effectiveness of our method regarding the classification accuracy while enjoying faster convergence speed.", "bibtex": "@inproceedings{NEURIPS2018_01eee509,\n author = {Huang, Wenbing and Zhang, Tong and Rong, Yu and Huang, Junzhou},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Sampling Towards Fast Graph Representation Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/01eee509ee2f68dc6014898c309e86bf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/01eee509ee2f68dc6014898c309e86bf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/01eee509ee2f68dc6014898c309e86bf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/01eee509ee2f68dc6014898c309e86bf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/01eee509ee2f68dc6014898c309e86bf-Reviews.html", "metareview": "", "pdf_size": 463939, "gs_citation": 640, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16083258165432188184&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Tencent AI Lab; Australian National University; Tencent AI Lab; Tencent AI Lab", "aff_domain": "126.com;anu.edu.au;hotmail.com;tencent.com", "email": "126.com;anu.edu.au;hotmail.com;tencent.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/01eee509ee2f68dc6014898c309e86bf-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Tencent;Australian National University", "aff_unique_dep": "Tencent AI Lab;", "aff_unique_url": "https://ai.tencent.com;https://www.anu.edu.au", "aff_unique_abbr": "Tencent AI Lab;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Australia" }, { "title": "Adaptive Skip Intervals: Temporal Abstraction for Recurrent Dynamical Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11932", "id": "11932", "author_site": "Alexander Neitz, Giambattista Parascandolo, Stefan Bauer, Bernhard Sch\u00f6lkopf", "author": "Alexander Neitz; Giambattista Parascandolo; Stefan Bauer; Bernhard Sch\u00f6lkopf", "abstract": "Part of", "bibtex": "@inproceedings{NEURIPS2018_0f0ee331,\n author = {Neitz, Alexander and Parascandolo, Giambattista and Bauer, Stefan and Sch\\\"{o}lkopf, Bernhard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Skip Intervals: Temporal Abstraction for Recurrent Dynamical Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0f0ee3310223fe38a989b2c818709393-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0f0ee3310223fe38a989b2c818709393-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0f0ee3310223fe38a989b2c818709393-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0f0ee3310223fe38a989b2c818709393-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0f0ee3310223fe38a989b2c818709393-Reviews.html", "metareview": "", "pdf_size": 2065541, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7596677518342590575&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Max Planck Institute for Intelligent Systems + Max Planck ETH Center for Learning Systems; Max Planck Institute for Intelligent Systems + Max Planck ETH Center for Learning Systems; Max Planck Institute for Intelligent Systems + Max Planck ETH Center for Learning Systems; Max Planck Institute for Intelligent Systems + Max Planck ETH Center for Learning Systems", "aff_domain": "tue.mpg.de; ; ; ", "email": "tue.mpg.de; ; ; ", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0f0ee3310223fe38a989b2c818709393-Abstract.html", "aff_unique_index": "0+1;0+1;0+1;0+1", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Max Planck ETH Center for Learning Systems", "aff_unique_dep": "Intelligent Systems;Center for Learning Systems", "aff_unique_url": "https://www.mpi-is.mpg.de;https://learning-systems.org", "aff_unique_abbr": "MPI-IS;", "aff_campus_unique_index": ";;;", "aff_campus_unique": "", "aff_country_unique_index": "0+1;0+1;0+1;0+1", "aff_country_unique": "Germany;Switzerland" }, { "title": "Adding One Neuron Can Eliminate All Bad Local Minima", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11430", "id": "11430", "author_site": "SHIYU LIANG, Ruoyu Sun, Jason Lee, R. Srikant", "author": "SHIYU LIANG; Ruoyu Sun; Jason Lee; R. Srikant", "abstract": "One of the main difficulties in analyzing neural networks is the non-convexity of the loss function which may have many bad local minima. In this paper, we study the landscape of neural networks for binary classification tasks. Under mild assumptions, we prove that after adding one special neuron with a skip connection to the output, or one special neuron per layer, every local minimum is a global minimum.", "bibtex": "@inproceedings{NEURIPS2018_a0128693,\n author = {LIANG, SHIYU and Sun, Ruoyu and Lee, Jason D and Srikant, R.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adding One Neuron Can Eliminate All Bad Local Minima},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a012869311d64a44b5a0d567cd20de04-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a012869311d64a44b5a0d567cd20de04-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a012869311d64a44b5a0d567cd20de04-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a012869311d64a44b5a0d567cd20de04-Reviews.html", "metareview": "", "pdf_size": 412464, "gs_citation": 106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=787179338990522533&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Coordinated Science Laboratory, Dept. of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign; Coordinated Science Laboratory, Department of ISE, University of Illinois at Urbana-Champaign; Marshall School of Business, University of Southern California; Coordinated Science Laboratory, Dept. of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign", "aff_domain": "illinois.edu;illinois.edu;marshall.usc.edu;illinois.edu", "email": "illinois.edu;illinois.edu;marshall.usc.edu;illinois.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a012869311d64a44b5a0d567cd20de04-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Southern California", "aff_unique_dep": "Dept. of Electrical and Computer Engineering;Marshall School of Business", "aff_unique_url": "https://illinois.edu;https://www.usc.edu", "aff_unique_abbr": "UIUC;USC", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Urbana-Champaign;Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Attacks on Stochastic Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11364", "id": "11364", "author_site": "Kwang-Sung Jun, Lihong Li, Yuzhe Ma, Jerry Zhu", "author": "Kwang-Sung Jun; Lihong Li; Yuzhe Ma; Xiaojin Zhu", "abstract": "We study adversarial attacks that manipulate the reward signals to control the actions chosen by a stochastic multi-armed bandit algorithm. We propose the first attack against two popular bandit algorithms: $\\epsilon$-greedy and UCB, \\emph{without} knowledge of the mean rewards. The attacker is able to spend only logarithmic effort, multiplied by a problem-specific parameter that becomes smaller as the bandit problem gets easier to attack. The result means the attacker can easily hijack the behavior of the bandit algorithm to promote or obstruct certain actions, say, a particular medical treatment. As bandits are seeing increasingly wide use in practice, our study exposes a significant security threat.", "bibtex": "@inproceedings{NEURIPS2018_85f007f8,\n author = {Jun, Kwang-Sung and Li, Lihong and Ma, Yuzhe and Zhu, Jerry},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adversarial Attacks on Stochastic Bandits},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/85f007f8c50dd25f5a45fca73cad64bd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/85f007f8c50dd25f5a45fca73cad64bd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/85f007f8c50dd25f5a45fca73cad64bd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/85f007f8c50dd25f5a45fca73cad64bd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/85f007f8c50dd25f5a45fca73cad64bd-Reviews.html", "metareview": "", "pdf_size": 676261, "gs_citation": 160, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6438898905984149945&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Boston University; Google Brain; UW-Madison; UW-Madison", "aff_domain": "gmail.com;google.com;wisc.edu;cs.wisc.edu", "email": "gmail.com;google.com;wisc.edu;cs.wisc.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/85f007f8c50dd25f5a45fca73cad64bd-Abstract.html", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Boston University;Google;University of Wisconsin-Madison", "aff_unique_dep": ";Google Brain;", "aff_unique_url": "https://www.bu.edu;https://brain.google.com;https://www.wisc.edu", "aff_unique_abbr": "BU;Google Brain;UW-Madison", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Mountain View;Madison", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Examples that Fool both Computer Vision and Time-Limited Humans", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11389", "id": "11389", "author_site": "Gamaleldin Elsayed, Shreya Shankar, Brian Cheung, Nicolas Papernot, Alexey Kurakin, Ian Goodfellow, Jascha Sohl-Dickstein", "author": "Gamaleldin Elsayed; Shreya Shankar; Brian Cheung; Nicolas Papernot; Alexey Kurakin; Ian Goodfellow; Jascha Sohl-Dickstein", "abstract": "Machine learning models are vulnerable to adversarial examples: small changes to images can cause computer vision models to make mistakes such as identifying a school bus as an ostrich. However, it is still an open question whether humans are prone to similar mistakes. Here, we address this question by leveraging recent techniques that transfer adversarial examples from computer vision models with known parameters and architecture to other models with unknown parameters and architecture, and by matching the initial processing of the human visual system. We find that adversarial examples that strongly transfer across computer vision models influence the classifications made by time-limited human observers.", "bibtex": "@inproceedings{NEURIPS2018_8562ae5e,\n author = {Elsayed, Gamaleldin and Shankar, Shreya and Cheung, Brian and Papernot, Nicolas and Kurakin, Alexey and Goodfellow, Ian and Sohl-Dickstein, Jascha},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adversarial Examples that Fool both Computer Vision and Time-Limited Humans},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8562ae5e286544710b2e7ebe9858833b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8562ae5e286544710b2e7ebe9858833b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8562ae5e286544710b2e7ebe9858833b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8562ae5e286544710b2e7ebe9858833b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8562ae5e286544710b2e7ebe9858833b-Reviews.html", "metareview": "", "pdf_size": 6045180, "gs_citation": 327, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16132791080537741650&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Google Brain; Stanford University; UC Berkeley; Pennsylvania State University; Google Brain; Google Brain; Google Brain", "aff_domain": "gmail.com; ; ; ; ; ;google.com", "email": "gmail.com; ; ; ; ; ;google.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8562ae5e286544710b2e7ebe9858833b-Abstract.html", "aff_unique_index": "0;1;2;3;0;0;0", "aff_unique_norm": "Google;Stanford University;University of California, Berkeley;Pennsylvania State University", "aff_unique_dep": "Google Brain;;;", "aff_unique_url": "https://brain.google.com;https://www.stanford.edu;https://www.berkeley.edu;https://www.psu.edu", "aff_unique_abbr": "Google Brain;Stanford;UC Berkeley;PSU", "aff_campus_unique_index": "0;1;2;0;0;0", "aff_campus_unique": "Mountain View;Stanford;Berkeley;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Multiple Source Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11818", "id": "11818", "author_site": "Han Zhao, Shanghang Zhang, Guanhang Wu, Jos\u00e9 M. F. Moura, Joao P Costeira, Geoffrey Gordon", "author": "Han Zhao; Shanghang Zhang; Guanhang Wu; Jos\u00e9 M. F. Moura; Joao P. Costeira; Geoffrey J. Gordon", "abstract": "While domain adaptation has been actively researched, most algorithms focus on the single-source-single-target adaptation setting. In this paper we propose new generalization bounds and algorithms under both classification and regression settings for unsupervised multiple source domain adaptation. Our theoretical analysis naturally leads to an efficient learning strategy using adversarial neural networks: we show how to interpret it as learning feature representations that are invariant to the multiple domain shifts while still being discriminative for the learning task. To this end, we propose multisource domain adversarial networks (MDAN) that approach domain adaptation by optimizing task-adaptive generalization bounds. To demonstrate the effectiveness of MDAN, we conduct extensive experiments showing superior adaptation performance on both classification and regression problems: sentiment analysis, digit classification, and vehicle counting.", "bibtex": "@inproceedings{NEURIPS2018_717d8b3d,\n author = {Zhao, Han and Zhang, Shanghang and Wu, Guanhang and Moura, Jos\\'{e} M. F. and Costeira, Joao P and Gordon, Geoffrey J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adversarial Multiple Source Domain Adaptation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/717d8b3d60d9eea997b35b02b6a4e867-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/717d8b3d60d9eea997b35b02b6a4e867-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/717d8b3d60d9eea997b35b02b6a4e867-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/717d8b3d60d9eea997b35b02b6a4e867-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/717d8b3d60d9eea997b35b02b6a4e867-Reviews.html", "metareview": "", "pdf_size": 948243, "gs_citation": 688, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16270852836599078221&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff": "Carnegie Mellon University; Carnegie Mellon University + IST, Universidade de Lisboa; Carnegie Mellon University; IST, Universidade de Lisboa; Carnegie Mellon University; Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;andrew.cmu.edu;andrew.cmu.edu;isr.ist.utl.pt;andrew.cmu.edu;andrew.cmu.edu", "email": "andrew.cmu.edu;andrew.cmu.edu;andrew.cmu.edu;isr.ist.utl.pt;andrew.cmu.edu;andrew.cmu.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/717d8b3d60d9eea997b35b02b6a4e867-Abstract.html", "aff_unique_index": "0;0+1;0;1;0;0", "aff_unique_norm": "Carnegie Mellon University;Instituto Superior T\u00e9cnico", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.ist.utl.pt", "aff_unique_abbr": "CMU;IST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+1;0;1;0;0", "aff_country_unique": "United States;Portugal" }, { "title": "Adversarial Regularizers in Inverse Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11813", "id": "11813", "author_site": "Sebastian Lunz, Carola-Bibiane Sch\u00f6nlieb, Ozan \u00d6ktem", "author": "Sebastian Lunz; Ozan \u00d6ktem; Carola-Bibiane Sch\u00f6nlieb", "abstract": "Inverse Problems in medical imaging and computer vision are traditionally solved using purely model-based methods. Among those variational regularization models are one of the most popular approaches. We propose a new framework for applying data-driven approaches to inverse problems, using a neural network as a regularization functional. The network learns to discriminate between the distribution of ground truth images and the distribution of unregularized reconstructions. Once trained, the network is applied to the inverse problem by solving the corresponding variational problem. Unlike other data-based approaches for inverse problems, the algorithm can be applied even if only unsupervised training data is available. Experiments demonstrate the potential of the framework for denoising on the BSDS dataset and for computer tomography reconstruction on the LIDC dataset.", "bibtex": "@inproceedings{NEURIPS2018_d903e960,\n author = {Lunz, Sebastian and \\\"{O}ktem, Ozan and Sch\\\"{o}nlieb, Carola-Bibiane},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adversarial Regularizers in Inverse Problems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d903e9608cfbf08910611e4346a0ba44-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d903e9608cfbf08910611e4346a0ba44-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d903e9608cfbf08910611e4346a0ba44-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d903e9608cfbf08910611e4346a0ba44-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d903e9608cfbf08910611e4346a0ba44-Reviews.html", "metareview": "", "pdf_size": 504167, "gs_citation": 298, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3594915696133260277&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "DAMTP, University of Cambridge; Department of Mathematics, KTH - Royal Institute of Technology; DAMTP, University of Cambridge", "aff_domain": "math.cam.ac.uk;kth.se;cam.ac.uk", "email": "math.cam.ac.uk;kth.se;cam.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d903e9608cfbf08910611e4346a0ba44-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Cambridge;KTH - Royal Institute of Technology", "aff_unique_dep": "Department of Applied Mathematics and Theoretical Physics;Department of Mathematics", "aff_unique_url": "https://www.cam.ac.uk;https://www.kth.se", "aff_unique_abbr": "Cambridge;KTH", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;Sweden" }, { "title": "Adversarial Risk and Robustness: General Definitions and Implications for the Uniform Distribution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11980", "id": "11980", "author_site": "Dimitrios Diochnos, Saeed Mahloujifar, Mohammad Mahmoody", "author": "Dimitrios Diochnos; Saeed Mahloujifar; Mohammad Mahmoody", "abstract": "We study adversarial perturbations when the instances are uniformly distributed over {0,1}^n. We study both \"inherent\" bounds that apply to any problem and any classifier for such a problem as well as bounds that apply to specific problems and specific hypothesis classes.", "bibtex": "@inproceedings{NEURIPS2018_3483e5ec,\n author = {Diochnos, Dimitrios and Mahloujifar, Saeed and Mahmoody, Mohammad},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adversarial Risk and Robustness: General Definitions and Implications for the Uniform Distribution},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3483e5ec0489e5c394b028ec4e81f3e1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3483e5ec0489e5c394b028ec4e81f3e1-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3483e5ec0489e5c394b028ec4e81f3e1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3483e5ec0489e5c394b028ec4e81f3e1-Reviews.html", "metareview": "", "pdf_size": 195247, "gs_citation": 95, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17941284905371613599&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "University of Virginia; University of Virginia; University of Virginia", "aff_domain": "virginia.edu;virginia.edu;virginia.edu", "email": "virginia.edu;virginia.edu;virginia.edu", "github": "", "project": "http://rodrigob.github.io/are_we_there_yet/build/", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3483e5ec0489e5c394b028ec4e81f3e1-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Virginia", "aff_unique_dep": "", "aff_unique_url": "https://www.virginia.edu", "aff_unique_abbr": "UVA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarial Scene Editing: Automatic Object Removal from Weak Supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11740", "id": "11740", "author_site": "Rakshith R Shetty, Mario Fritz, Bernt Schiele", "author": "Rakshith R Shetty; Mario Fritz; Bernt Schiele", "abstract": "While great progress has been made recently in automatic image manipulation, it has been limited to object centric images like faces or structured scene datasets.\nIn this work, we take a step towards general scene-level image editing by developing an automatic interaction-free object removal model. Our model learns to find and remove objects from general scene images using image-level labels and unpaired data in a generative adversarial network (GAN) framework. We achieve this with two key contributions: a two-stage editor architecture consisting of a mask generator and image in-painter that co-operate to remove objects, and a novel GAN based prior for the mask generator that allows us to flexibly incorporate knowledge about object shapes. We experimentally show on two datasets that our method effectively removes a wide variety of objects using weak supervision only.", "bibtex": "@inproceedings{NEURIPS2018_c911241d,\n author = {Shetty, Rakshith R and Fritz, Mario and Schiele, Bernt},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adversarial Scene Editing: Automatic Object Removal from Weak Supervision},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c911241d00294e8bb714eee2e83fa475-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c911241d00294e8bb714eee2e83fa475-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c911241d00294e8bb714eee2e83fa475-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c911241d00294e8bb714eee2e83fa475-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c911241d00294e8bb714eee2e83fa475-Reviews.html", "metareview": "", "pdf_size": 2161375, "gs_citation": 112, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16221838653575696987&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "Max Planck Institute for Informatics, Saarland Informatics Campus; CISPA Helmholtz Center i.G., Saarland Informatics Campus; Max Planck Institute for Informatics, Saarland Informatics Campus", "aff_domain": "mpi-inf.mpg.de;cispa.saarland;mpi-inf.mpg.de", "email": "mpi-inf.mpg.de;cispa.saarland;mpi-inf.mpg.de", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c911241d00294e8bb714eee2e83fa475-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Max Planck Institute for Informatics;CISPA Helmholtz Center", "aff_unique_dep": ";", "aff_unique_url": "https://mpi-inf.mpg.de;https://www.cispa.de", "aff_unique_abbr": "MPII;CISPA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Saarland", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Adversarial Text Generation via Feature-Mover's Distance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11459", "id": "11459", "author_site": "Liqun Chen, Shuyang Dai, Chenyang Tao, Haichao Zhang, Zhe Gan, Dinghan Shen, Yizhe Zhang, Guoyin Wang, Dinghan Shen, Lawrence Carin", "author": "Liqun Chen; Shuyang Dai; Chenyang Tao; Haichao Zhang; Zhe Gan; Dinghan Shen; Yizhe Zhang; Guoyin Wang; Ruiyi Zhang; Lawrence Carin", "abstract": "Generative adversarial networks (GANs) have achieved significant success in generating real-valued data. However, the discrete nature of text hinders the application of GAN to text-generation tasks. Instead of using the standard GAN objective, we propose to improve text-generation GAN via a novel approach inspired by optimal transport. Specifically, we consider matching the latent feature distributions of real and synthetic sentences using a novel metric, termed the feature-mover's distance (FMD). This formulation leads to a highly discriminative critic and easy-to-optimize objective, overcoming the mode-collapsing and brittle-training problems in existing methods. Extensive experiments are conducted on a variety of tasks to evaluate the proposed model empirically, including unconditional text generation, style transfer from non-parallel text, and unsupervised cipher cracking. The proposed model yields superior performance, demonstrating wide applicability and effectiveness.", "bibtex": "@inproceedings{NEURIPS2018_074177d3,\n author = {Chen, Liqun and Dai, Shuyang and Tao, Chenyang and Zhang, Haichao and Gan, Zhe and Shen, Dinghan and Zhang, Yizhe and Wang, Guoyin and Zhang, Ruiyi and Carin, Lawrence},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adversarial Text Generation via Feature-Mover\\textquotesingle s Distance},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/074177d3eb6371e32c16c55a3b8f706b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/074177d3eb6371e32c16c55a3b8f706b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/074177d3eb6371e32c16c55a3b8f706b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/074177d3eb6371e32c16c55a3b8f706b-Reviews.html", "metareview": "", "pdf_size": 842297, "gs_citation": 173, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16917846960576555740&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": ";;;;;;;;;", "aff_domain": ";;;;;;;;;", "email": ";;;;;;;;;", "github": "", "project": "", "author_num": 10, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/074177d3eb6371e32c16c55a3b8f706b-Abstract.html" }, { "title": "Adversarial vulnerability for any classifier", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11136", "id": "11136", "author_site": "Alhussein Fawzi, Hamza Fawzi, Omar Fawzi", "author": "Alhussein Fawzi; Hamza Fawzi; Omar Fawzi", "abstract": "Despite achieving impressive performance, state-of-the-art classifiers remain highly vulnerable to small, imperceptible, adversarial perturbations. This vulnerability has proven empirically to be very intricate to address. In this paper, we study the phenomenon of adversarial perturbations under the assumption that the data is generated with a smooth generative model. We derive fundamental upper bounds on the robustness to perturbations of any classification function, and prove the existence of adversarial perturbations that transfer well across different classifiers with small risk. Our analysis of the robustness also provides insights onto key properties of generative models, such as their smoothness and dimensionality of latent space. We conclude with numerical experimental results showing that our bounds provide informative baselines to the maximal achievable robustness on several datasets.", "bibtex": "@inproceedings{NEURIPS2018_851ddf50,\n author = {Fawzi, Alhussein and Fawzi, Hamza and Fawzi, Omar},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adversarial vulnerability for any classifier},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/851ddf5058cf22df63d3344ad89919cf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/851ddf5058cf22df63d3344ad89919cf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/851ddf5058cf22df63d3344ad89919cf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/851ddf5058cf22df63d3344ad89919cf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/851ddf5058cf22df63d3344ad89919cf-Reviews.html", "metareview": "", "pdf_size": 706725, "gs_citation": 307, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4855982336481254184&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "DeepMind; Department of Applied Mathematics & Theoretical Physics, University of Cambridge; ENS de Lyon*", "aff_domain": "google.com;damtp.cam.ac.uk;ens-lyon.fr", "email": "google.com;damtp.cam.ac.uk;ens-lyon.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/851ddf5058cf22df63d3344ad89919cf-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "DeepMind;University of Cambridge;\u00c9cole Normale Sup\u00e9rieure de Lyon", "aff_unique_dep": ";Department of Applied Mathematics & Theoretical Physics;", "aff_unique_url": "https://deepmind.com;https://www.cam.ac.uk;https://www.ens-lyon.fr", "aff_unique_abbr": "DeepMind;Cambridge;ENS de Lyon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;France" }, { "title": "Adversarially Robust Generalization Requires More Data", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11491", "id": "11491", "author_site": "Ludwig Schmidt, Shibani Santurkar, Dimitris Tsipras, Kunal Talwar, Aleksander Madry", "author": "Ludwig Schmidt; Shibani Santurkar; Dimitris Tsipras; Kunal Talwar; Aleksander Madry", "abstract": "Machine learning models are often susceptible to adversarial perturbations of their inputs. Even small perturbations can cause state-of-the-art classifiers with high \"standard\" accuracy to produce an incorrect prediction with high confidence. To better understand this phenomenon, we study adversarially robust learning from the viewpoint of generalization. We show that already in a simple natural data model, the sample complexity of robust learning can be significantly larger than that of \"standard\" learning. This gap is information theoretic and holds irrespective of the training algorithm or the model family. We complement our theoretical results with experiments on popular image classification datasets and show that a similar gap exists here as well. We postulate that the difficulty of training robust classifiers stems, at least partially, from this inherently larger sample complexity.", "bibtex": "@inproceedings{NEURIPS2018_f708f064,\n author = {Schmidt, Ludwig and Santurkar, Shibani and Tsipras, Dimitris and Talwar, Kunal and Madry, Aleksander},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adversarially Robust Generalization Requires More Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f708f064faaf32a43e4d3c784e6af9ea-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f708f064faaf32a43e4d3c784e6af9ea-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f708f064faaf32a43e4d3c784e6af9ea-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f708f064faaf32a43e4d3c784e6af9ea-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f708f064faaf32a43e4d3c784e6af9ea-Reviews.html", "metareview": "", "pdf_size": 434218, "gs_citation": 962, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11617408739335906297&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "UC Berkeley; MIT; MIT; Google Brain; MIT", "aff_domain": "berkeley.edu;mit.edu;mit.edu;google.com;mit.edu", "email": "berkeley.edu;mit.edu;mit.edu;google.com;mit.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f708f064faaf32a43e4d3c784e6af9ea-Abstract.html", "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "University of California, Berkeley;Massachusetts Institute of Technology;Google", "aff_unique_dep": ";;Google Brain", "aff_unique_url": "https://www.berkeley.edu;https://web.mit.edu;https://brain.google.com", "aff_unique_abbr": "UC Berkeley;MIT;Google Brain", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adversarially Robust Optimization with Gaussian Processes", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11561", "id": "11561", "author_site": "Ilija Bogunovic, Jonathan Scarlett, Stefanie Jegelka, Volkan Cevher", "author": "Ilija Bogunovic; Jonathan Scarlett; Stefanie Jegelka; Volkan Cevher", "abstract": "In this paper, we consider the problem of Gaussian process (GP) optimization with an added robustness requirement: The returned point may be perturbed by an adversary, and we require the function value to remain as high as possible even after this perturbation. This problem is motivated by settings in which the underlying functions during optimization and implementation stages are different, or when one is interested in finding an entire region of good inputs rather than only a single point. We show that standard GP optimization algorithms do not exhibit the desired robustness properties, and provide a novel confidence-bound based algorithm StableOpt for this purpose. We rigorously establish the required number of samples for StableOpt to find a near-optimal point, and we complement this guarantee with an algorithm-independent lower bound. We experimentally demonstrate several potential applications of interest using real-world data sets, and we show that StableOpt consistently succeeds in finding a stable maximizer where several baseline methods fail.", "bibtex": "@inproceedings{NEURIPS2018_60243f9b,\n author = {Bogunovic, Ilija and Scarlett, Jonathan and Jegelka, Stefanie and Cevher, Volkan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adversarially Robust Optimization with Gaussian Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/60243f9b1ac2dba11ff8131c8f4431e0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/60243f9b1ac2dba11ff8131c8f4431e0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/60243f9b1ac2dba11ff8131c8f4431e0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/60243f9b1ac2dba11ff8131c8f4431e0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/60243f9b1ac2dba11ff8131c8f4431e0-Reviews.html", "metareview": "", "pdf_size": 3272241, "gs_citation": 168, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12217282696558505452&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "LIONS, EPFL; National University of Singapore; MIT CSAIL; LIONS, EPFL", "aff_domain": "epfl.ch;comp.nus.edu.sg;mit.edu;epfl.ch", "email": "epfl.ch;comp.nus.edu.sg;mit.edu;epfl.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/60243f9b1ac2dba11ff8131c8f4431e0-Abstract.html", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "EPFL;National University of Singapore;Massachusetts Institute of Technology", "aff_unique_dep": "LIONS;;Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.epfl.ch;https://www.nus.edu.sg;https://www.csail.mit.edu", "aff_unique_abbr": "EPFL;NUS;MIT CSAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Switzerland;Singapore;United States" }, { "title": "Algebraic tests of general Gaussian latent tree models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11610", "id": "11610", "author_site": "Dennis Leung, Mathias Drton", "author": "Dennis Leung; Mathias Drton", "abstract": "We consider general Gaussian latent tree models in which the observed variables are not restricted to be leaves of the tree. Extending related recent work, we give a full semi-algebraic description of the set of covariance matrices of any such model. In other words, we find polynomial constraints that characterize when a matrix is the covariance matrix of a distribution in a given latent tree model. However, leveraging these constraints to test a given such model is often complicated by the number of constraints being large and by singularities of individual polynomials, which may invalidate standard approximations to relevant probability distributions. Illustrating with the star tree, we propose a new testing methodology that circumvents singularity issues by trading off some statistical estimation efficiency and handles cases with many constraints through recent advances on Gaussian approximation for maxima of sums of high-dimensional random vectors. Our test avoids the need to maximize the possibly multimodal likelihood function of such models and is applicable to models with larger number of variables. These points are illustrated in numerical experiments.", "bibtex": "@inproceedings{NEURIPS2018_bbb001ba,\n author = {Leung, Dennis and Drton, Mathias},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Algebraic tests of general Gaussian latent tree models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/bbb001ba009ed11717eaec9305b2feb6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/bbb001ba009ed11717eaec9305b2feb6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/bbb001ba009ed11717eaec9305b2feb6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/bbb001ba009ed11717eaec9305b2feb6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/bbb001ba009ed11717eaec9305b2feb6-Reviews.html", "metareview": "", "pdf_size": 338660, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8311840141085023413&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Data Sciences and Operations, University of Southern California; Department of Statistics, University of Washington + Department of Mathematical Sciences, University of Copenhagen", "aff_domain": "uw.edu;uw.edu", "email": "uw.edu;uw.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/bbb001ba009ed11717eaec9305b2feb6-Abstract.html", "aff_unique_index": "0;1+2", "aff_unique_norm": "University of Southern California;University of Washington;University of Copenhagen", "aff_unique_dep": "Department of Data Sciences and Operations;Department of Statistics;Department of Mathematical Sciences", "aff_unique_url": "https://www.usc.edu;https://www.washington.edu;https://www.ku.dk", "aff_unique_abbr": "USC;UW;UCPH", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Los Angeles;Seattle;", "aff_country_unique_index": "0;0+1", "aff_country_unique": "United States;Denmark" }, { "title": "Algorithmic Assurance: An Active Approach to Algorithmic Testing using Bayesian Optimisation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11534", "id": "11534", "author_site": "Shivapratap Gopakumar, Sunil Gupta, Santu Rana, Vu Nguyen, Svetha Venkatesh", "author": "Shivapratap Gopakumar; Sunil Gupta; Santu Rana; Vu Nguyen; Svetha Venkatesh", "abstract": "We introduce algorithmic assurance, the problem of testing whether\nmachine learning algorithms are conforming to their intended design\ngoal. We address this problem by proposing an efficient framework\nfor algorithmic testing. To provide assurance, we need to efficiently\ndiscover scenarios where an algorithm decision deviates maximally\nfrom its intended gold standard. We mathematically formulate this\ntask as an optimisation problem of an expensive, black-box function.\nWe use an active learning approach based on Bayesian optimisation\nto solve this optimisation problem. We extend this framework to algorithms\nwith vector-valued outputs by making appropriate modification in Bayesian\noptimisation via the EXP3 algorithm. We theoretically analyse our\nmethods for convergence. Using two real-world applications, we demonstrate\nthe efficiency of our methods. The significance of our problem formulation\nand initial solutions is that it will serve as the foundation in assuring\nhumans about machines making complex decisions.", "bibtex": "@inproceedings{NEURIPS2018_cc709032,\n author = {Gopakumar, Shivapratap and Gupta, Sunil and Rana, Santu and Nguyen, Vu and Venkatesh, Svetha},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Algorithmic Assurance: An Active Approach to Algorithmic Testing using Bayesian Optimisation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cc70903297fe1e25537ae50aea186306-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cc70903297fe1e25537ae50aea186306-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cc70903297fe1e25537ae50aea186306-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cc70903297fe1e25537ae50aea186306-Reviews.html", "metareview": "", "pdf_size": 395505, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6517267723562437007&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Centre for Pattern Recognition and Data Analytics, Deakin University, Geelong, Australia; Centre for Pattern Recognition and Data Analytics, Deakin University, Geelong, Australia; Centre for Pattern Recognition and Data Analytics, Deakin University, Geelong, Australia; Centre for Pattern Recognition and Data Analytics, Deakin University, Geelong, Australia; Centre for Pattern Recognition and Data Analytics, Deakin University, Geelong, Australia", "aff_domain": "deakin.edu.au; ; ; ; ", "email": "deakin.edu.au; ; ; ; ", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cc70903297fe1e25537ae50aea186306-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Deakin University", "aff_unique_dep": "Centre for Pattern Recognition and Data Analytics", "aff_unique_url": "https://www.deakin.edu.au", "aff_unique_abbr": "", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Geelong", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Algorithmic Linearly Constrained Gaussian Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11225", "id": "11225", "author": "Markus Lange-Hegermann", "abstract": "We algorithmically construct multi-output Gaussian process priors which satisfy linear differential equations. Our approach attempts to parametrize all solutions of the equations using Gr\u00f6bner bases. If successful, a push forward Gaussian process along the paramerization is the desired prior. We consider several examples from physics, geomathmatics and control, among them the full inhomogeneous system of Maxwell's equations. By bringing together stochastic learning and computeralgebra in a novel way, we combine noisy observations with precise algebraic computations.", "bibtex": "@inproceedings{NEURIPS2018_68b1fbe7,\n author = {Lange-Hegermann, Markus},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Algorithmic Linearly Constrained Gaussian Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/68b1fbe7f16e4ae3024973f12f3cb313-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/68b1fbe7f16e4ae3024973f12f3cb313-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/68b1fbe7f16e4ae3024973f12f3cb313-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/68b1fbe7f16e4ae3024973f12f3cb313-Reviews.html", "metareview": "", "pdf_size": 558454, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16921480062292483246&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Electrical Engineering and Computer Science, Ostwestfalen-Lippe University of Applied Sciences, Lemgo", "aff_domain": "hs-owl.de", "email": "hs-owl.de", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/68b1fbe7f16e4ae3024973f12f3cb313-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Ostwestfalen-Lippe University of Applied Sciences", "aff_unique_dep": "Department of Electrical Engineering and Computer Science", "aff_unique_url": "https://www.th-owl.de", "aff_unique_abbr": "", "aff_campus_unique_index": "0", "aff_campus_unique": "Lemgo", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "title": "Algorithmic Regularization in Learning Deep Homogeneous Models: Layers are Automatically Balanced", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11063", "id": "11063", "author_site": "Simon Du, Wei Hu, Jason Lee", "author": "Simon S Du; Wei Hu; Jason Lee", "abstract": "We study the implicit regularization imposed by gradient descent for learning multi-layer homogeneous functions including feed-forward fully connected and convolutional deep neural networks with linear, ReLU or Leaky ReLU activation. We rigorously prove that gradient flow (i.e. gradient descent with infinitesimal step size) effectively enforces the differences between squared norms across different layers to remain invariant without any explicit regularization. This result implies that if the weights are initially small, gradient flow automatically balances the magnitudes of all layers. Using a discretization argument, we analyze gradient descent with positive step size for the non-convex low-rank asymmetric matrix factorization problem without any regularization. Inspired by our findings for gradient flow, we prove that gradient descent with step sizes $\\eta_t=O(t^{\u2212(1/2+\\delta)}) (0<\\delta\\le1/2)$ automatically balances two low-rank factors and converges to a bounded global optimum. Furthermore, for rank-1 asymmetric matrix factorization we give a finer analysis showing gradient descent with constant step size converges to the global minimum at a globally linear rate. We believe that the idea of examining the invariance imposed by first order algorithms in learning homogeneous models could serve as a fundamental building block for studying optimization for learning deep models.", "bibtex": "@inproceedings{NEURIPS2018_fe131d7f,\n author = {Du, Simon S and Hu, Wei and Lee, Jason D},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Algorithmic Regularization in Learning Deep Homogeneous Models: Layers are Automatically Balanced},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fe131d7f5a6b38b23cc967316c13dae2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fe131d7f5a6b38b23cc967316c13dae2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/fe131d7f5a6b38b23cc967316c13dae2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fe131d7f5a6b38b23cc967316c13dae2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fe131d7f5a6b38b23cc967316c13dae2-Reviews.html", "metareview": "", "pdf_size": 1193528, "gs_citation": 269, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14501985540297080108&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Machine Learning Department, School of Computer Science, Carnegie Mellon University;Computer Science Department, Princeton University;Department of Data Sciences and Operations, Marshall School of Business, University of Southern California", "aff_domain": "cs.cmu.edu;cs.princeton.edu;marshall.usc.edu", "email": "cs.cmu.edu;cs.princeton.edu;marshall.usc.edu", "github": "", "project": "https://arxiv.org/abs/1806.00900", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fe131d7f5a6b38b23cc967316c13dae2-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Carnegie Mellon University;Princeton University;University of Southern California", "aff_unique_dep": "Machine Learning Department;Computer Science Department;Department of Data Sciences and Operations", "aff_unique_url": "https://www.cmu.edu;https://www.princeton.edu;https://www.usc.edu", "aff_unique_abbr": "CMU;Princeton;USC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Princeton;Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Algorithms and Theory for Multiple-Source Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11789", "id": "11789", "author_site": "Judy Hoffman, Mehryar Mohri, Ningshan Zhang", "author": "Judy Hoffman; Mehryar Mohri; Ningshan Zhang", "abstract": "We present a number of novel contributions to the multiple-source adaptation problem. We derive new normalized solutions with strong theoretical guarantees for the cross-entropy loss and other similar losses. We also provide new guarantees that hold in the case where the conditional probabilities for the source domains are distinct. Moreover, we give new algorithms for determining the distribution-weighted combination solution for the cross-entropy loss and other losses. We report the results of a series of experiments with real-world datasets. We find that our algorithm outperforms competing approaches by producing a single robust model that performs well on any target mixture distribution. Altogether, our theory, algorithms, and empirical results provide a full solution for the multiple-source adaptation problem with very practical benefits.", "bibtex": "@inproceedings{NEURIPS2018_2e2079d6,\n author = {Hoffman, Judy and Mohri, Mehryar and Zhang, Ningshan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Algorithms and Theory for Multiple-Source Adaptation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2e2079d63348233d91cad1fa9b1361e9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2e2079d63348233d91cad1fa9b1361e9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2e2079d63348233d91cad1fa9b1361e9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2e2079d63348233d91cad1fa9b1361e9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2e2079d63348233d91cad1fa9b1361e9-Reviews.html", "metareview": "", "pdf_size": 987394, "gs_citation": 275, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11631836443659631127&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "CS Department UC Berkeley; Courant Institute and Google; New York University", "aff_domain": "eecs.berkeley.edu;cims.nyu.edu;stern.nyu.edu", "email": "eecs.berkeley.edu;cims.nyu.edu;stern.nyu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2e2079d63348233d91cad1fa9b1361e9-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Berkeley;Courant Institute of Mathematical Sciences;New York University", "aff_unique_dep": "Computer Science Department;Mathematical Sciences;", "aff_unique_url": "https://www.berkeley.edu;https://courant.nyu.edu;https://www.nyu.edu", "aff_unique_abbr": "UC Berkeley;Courant;NYU", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Almost Optimal Algorithms for Linear Stochastic Bandits with Heavy-Tailed Payoffs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11805", "id": "11805", "author_site": "Han Shao, Xiaotian Yu, Irwin King, Michael R Lyu", "author": "Han Shao; Xiaotian Yu; Irwin King; Michael R Lyu", "abstract": "In linear stochastic bandits, it is commonly assumed that payoffs are with sub-Gaussian noises. In this paper, under a weaker assumption on noises, we study the problem of \\underline{lin}ear stochastic {\\underline b}andits with h{\\underline e}avy-{\\underline t}ailed payoffs (LinBET), where the distributions have finite moments of order $1+\\epsilon$, for some $\\epsilon\\in (0,1]$. We rigorously analyze the regret lower bound of LinBET as $\\Omega(T^{\\frac{1}{1+\\epsilon}})$, implying that finite moments of order 2 (i.e., finite variances) yield the bound of $\\Omega(\\sqrt{T})$, with $T$ being the total number of rounds to play bandits. The provided lower bound also indicates that the state-of-the-art algorithms for LinBET are far from optimal. By adopting median of means with a well-designed allocation of decisions and truncation based on historical information, we develop two novel bandit algorithms, where the regret upper bounds match the lower bound up to polylogarithmic factors. To the best of our knowledge, we are the first to solve LinBET optimally in the sense of the polynomial order on $T$. Our proposed algorithms are evaluated based on synthetic datasets, and outperform the state-of-the-art results.", "bibtex": "@inproceedings{NEURIPS2018_173f0f6b,\n author = {Shao, Han and Yu, Xiaotian and King, Irwin and Lyu, Michael R},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Almost Optimal Algorithms for Linear Stochastic Bandits with Heavy-Tailed Payoffs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/173f0f6bb0ee97cf5098f73ee94029d4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/173f0f6bb0ee97cf5098f73ee94029d4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/173f0f6bb0ee97cf5098f73ee94029d4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/173f0f6bb0ee97cf5098f73ee94029d4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/173f0f6bb0ee97cf5098f73ee94029d4-Reviews.html", "metareview": "", "pdf_size": 557777, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3841503369235777621&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science and Engineering, The Chinese University of Hong Kong; Department of Computer Science and Engineering, The Chinese University of Hong Kong; Department of Computer Science and Engineering, The Chinese University of Hong Kong; Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk", "email": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/173f0f6bb0ee97cf5098f73ee94029d4-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Alternating optimization of decision trees, with application to learning sparse oblique trees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11139", "id": "11139", "author_site": "Miguel A. Carreira-Perpinan, Pooya Tavallali", "author": "Miguel A. Carreira-Perpinan; Pooya Tavallali", "abstract": "Learning a decision tree from data is a difficult optimization problem. The most widespread algorithm in practice, dating to the 1980s, is based on a greedy growth of the tree structure by recursively splitting nodes, and possibly pruning back the final tree. The parameters (decision function) of an internal node are approximately estimated by minimizing an impurity measure. We give an algorithm that, given an input tree (its structure and the parameter values at its nodes), produces a new tree with the same or smaller structure but new parameter values that provably lower or leave unchanged the misclassification error. This can be applied to both axis-aligned and oblique trees and our experiments show it consistently outperforms various other algorithms while being highly scalable to large datasets and trees. Further, the same algorithm can handle a sparsity penalty, so it can learn sparse oblique trees, having a structure that is a subset of the original tree and few nonzero parameters. This combines the best of axis-aligned and oblique trees: flexibility to model correlated data, low generalization error, fast inference and interpretable nodes that involve only a few features in their decision.", "bibtex": "@inproceedings{NEURIPS2018_185c29dc,\n author = {Carreira-Perpinan, Miguel A. and Tavallali, Pooya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Alternating optimization of decision trees, with application to learning sparse oblique trees},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/185c29dc24325934ee377cfda20e414c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/185c29dc24325934ee377cfda20e414c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/185c29dc24325934ee377cfda20e414c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/185c29dc24325934ee377cfda20e414c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/185c29dc24325934ee377cfda20e414c-Reviews.html", "metareview": "", "pdf_size": 828684, "gs_citation": 131, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17990550332462203008&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Dept. EECS, University of California, Merced; Dept. EECS, University of California, Merced", "aff_domain": "ucmerced.edu;ucmerced.edu", "email": "ucmerced.edu;ucmerced.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/185c29dc24325934ee377cfda20e414c-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Merced", "aff_unique_dep": "Dept. of Electrical Engineering and Computer Sciences", "aff_unique_url": "https://www.ucmerced.edu", "aff_unique_abbr": "UC Merced", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Merced", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Amortized Inference Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11434", "id": "11434", "author_site": "Rui Shu, Hung Bui, Shengjia Zhao, Mykel J Kochenderfer, Stefano Ermon", "author": "Rui Shu; Hung H Bui; Shengjia Zhao; Mykel J Kochenderfer; Stefano Ermon", "abstract": "The variational autoencoder (VAE) is a popular model for density estimation and representation learning. Canonically, the variational principle suggests to prefer an expressive inference model so that the variational approximation is accurate. However, it is often overlooked that an overly-expressive inference model can be detrimental to the test set performance of both the amortized posterior approximator and, more importantly, the generative density estimator. In this paper, we leverage the fact that VAEs rely on amortized inference and propose techniques for amortized inference regularization (AIR) that control the smoothness of the inference model. We demonstrate that, by applying AIR, it is possible to improve VAE generalization on both inference and generative performance. Our paper challenges the belief that amortized inference is simply a mechanism for approximating maximum likelihood training and illustrates that regularization of the amortization family provides a new direction for understanding and improving generalization in VAEs.", "bibtex": "@inproceedings{NEURIPS2018_1819932f,\n author = {Shu, Rui and Bui, Hung H and Zhao, Shengjia and Kochenderfer, Mykel J and Ermon, Stefano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Amortized Inference Regularization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1819932ff5cf474f4f19e7c7024640c2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1819932ff5cf474f4f19e7c7024640c2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1819932ff5cf474f4f19e7c7024640c2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1819932ff5cf474f4f19e7c7024640c2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1819932ff5cf474f4f19e7c7024640c2-Reviews.html", "metareview": "", "pdf_size": 681741, "gs_citation": 106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3461308530187068761&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Stanford University; DeepMind; Stanford University; Stanford University; Stanford University", "aff_domain": "stanford.edu;google.com;stanford.edu;stanford.edu;cs.stanford.edu", "email": "stanford.edu;google.com;stanford.edu;stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1819932ff5cf474f4f19e7c7024640c2-Abstract.html", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Stanford University;DeepMind", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://deepmind.com", "aff_unique_abbr": "Stanford;DeepMind", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "An Efficient Pruning Algorithm for Robust Isotonic Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11048", "id": "11048", "author": "Cong Han Lim", "abstract": "We study a generalization of the classic isotonic regression problem where we allow separable nonconvex objective functions, focusing on the case of estimators used in robust regression. A simple dynamic programming approach allows us to solve this problem to within \u03b5-accuracy (of the global minimum) in time linear in 1/\u03b5 and the dimension. We can combine techniques from the convex case with branch-and-bound ideas to form a new algorithm for this problem that naturally exploits the shape of the objective function. Our algorithm achieves the best bounds for both the general nonconvex and convex case (linear in log (1/\u03b5)), while performing much faster in practice than a straightforward dynamic programming approach, especially as the desired accuracy increases.", "bibtex": "@inproceedings{NEURIPS2018_96da2f59,\n author = {Lim, Cong Han},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {An Efficient Pruning Algorithm for Robust Isotonic Regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/96da2f590cd7246bbde0051047b0d6f7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/96da2f590cd7246bbde0051047b0d6f7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/96da2f590cd7246bbde0051047b0d6f7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/96da2f590cd7246bbde0051047b0d6f7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/96da2f590cd7246bbde0051047b0d6f7-Reviews.html", "metareview": "", "pdf_size": 508233, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=322878459574213055&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/96da2f590cd7246bbde0051047b0d6f7-Abstract.html" }, { "title": "An Improved Analysis of Alternating Minimization for Structured Multi-Response Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11639", "id": "11639", "author_site": "Sheng Chen, Arindam Banerjee", "author": "Sheng Chen; Arindam Banerjee", "abstract": "Multi-response linear models aggregate a set of vanilla linear models by assuming correlated noise across them, which has an unknown covariance structure. To find the coefficient vector, estimators with a joint approximation of the noise covariance are often preferred than the simple linear regression in view of their superior empirical performance, which can be generally solved by alternating-minimization type procedures. Due to the non-convex nature of such joint estimators, the theoretical justification of their efficiency is typically challenging. The existing analyses fail to fully explain the empirical observations due to the assumption of resampling on the alternating procedures, which requires access to fresh samples in each iteration. In this work, we present a resampling-free analysis for the alternating minimization algorithm applied to the multi-response regression. In particular, we focus on the high-dimensional setting of multi-response linear models with structured coefficient parameter, and the statistical error of the parameter can be expressed by the complexity measure, Gaussian width, which is related to the assumed structure. More importantly, to the best of our knowledge, our result reveals for the first time that the alternating minimization with random initialization can achieve the same performance as the well-initialized one when solving this multi-response regression problem. Experimental results support our theoretical developments.", "bibtex": "@inproceedings{NEURIPS2018_59a3adea,\n author = {Chen, Sheng and Banerjee, Arindam},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {An Improved Analysis of Alternating Minimization for Structured Multi-Response Regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/59a3adea76fadcb6dd9e54c96fc155d1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/59a3adea76fadcb6dd9e54c96fc155d1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/59a3adea76fadcb6dd9e54c96fc155d1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/59a3adea76fadcb6dd9e54c96fc155d1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/59a3adea76fadcb6dd9e54c96fc155d1-Reviews.html", "metareview": "", "pdf_size": 497666, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15503111260314896530&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "The Voleon Group + University of Minnesota, Twin Cities; Dept. of Computer Science & Engineering, University of Minnesota, Twin Cities", "aff_domain": "umn.edu;cs.umn.edu", "email": "umn.edu;cs.umn.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/59a3adea76fadcb6dd9e54c96fc155d1-Abstract.html", "aff_unique_index": "0+1;1", "aff_unique_norm": "Voleon Group;University of Minnesota", "aff_unique_dep": ";", "aff_unique_url": ";https://www.minnesota.edu", "aff_unique_abbr": ";UMN", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Twin Cities", "aff_country_unique_index": "0+0;0", "aff_country_unique": "United States" }, { "title": "An Information-Theoretic Analysis for Thompson Sampling with Many Actions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11412", "id": "11412", "author_site": "Shi Dong, Benjamin Van Roy", "author": "Shi Dong; Benjamin Van Roy", "abstract": "Information-theoretic Bayesian regret bounds of Russo and Van Roy capture the dependence of regret on prior uncertainty. However, this dependence is through entropy, which can become arbitrarily large as the number of actions increases. We establish new bounds that depend instead on a notion of rate-distortion. Among other things, this allows us to recover through information-theoretic arguments a near-optimal bound for the linear bandit. We also offer a bound for the logistic bandit that dramatically improves on the best previously available, though this bound depends on an information-theoretic statistic that we have only been able to quantify via computation.", "bibtex": "@inproceedings{NEURIPS2018_f3e52c30,\n author = {Dong, Shi and Van Roy, Benjamin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {An Information-Theoretic Analysis for Thompson Sampling with Many Actions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f3e52c300b822a8123e7ace55fe15c08-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f3e52c300b822a8123e7ace55fe15c08-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f3e52c300b822a8123e7ace55fe15c08-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f3e52c300b822a8123e7ace55fe15c08-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f3e52c300b822a8123e7ace55fe15c08-Reviews.html", "metareview": "", "pdf_size": 706972, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18333052337196651631&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Stanford University; Stanford University", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f3e52c300b822a8123e7ace55fe15c08-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "An Off-policy Policy Gradient Theorem Using Emphatic Weightings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11037", "id": "11037", "author_site": "Ehsan Imani, Eric Graves, Martha White", "author": "Ehsan Imani; Eric Graves; Martha White", "abstract": "Policy gradient methods are widely used for control in reinforcement learning, particularly for the continuous action setting. There have been a host of theoretically sound algorithms proposed for the on-policy setting, due to the existence of the policy gradient theorem which provides a simplified form for the gradient. In off-policy learning, however, where the behaviour policy is not necessarily attempting to learn and follow the optimal policy for the given task, the existence of such a theorem has been elusive. In this work, we solve this open problem by providing the first off-policy policy gradient theorem. The key to the derivation is the use of emphatic weightings. We develop a new actor-critic algorithm\u2014called Actor Critic with Emphatic weightings (ACE)\u2014that approximates the simplified gradients provided by the theorem. We demonstrate in a simple counterexample that previous off-policy policy gradient methods\u2014particularly OffPAC and DPG\u2014converge to the wrong solution whereas ACE finds the optimal solution.", "bibtex": "@inproceedings{NEURIPS2018_3ef81541,\n author = {Imani, Ehsan and Graves, Eric and White, Martha},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3ef815416f775098fe977004015c6193-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3ef815416f775098fe977004015c6193-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3ef815416f775098fe977004015c6193-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3ef815416f775098fe977004015c6193-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3ef815416f775098fe977004015c6193-Reviews.html", "metareview": "", "pdf_size": 1522138, "gs_citation": 90, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8376753720724526030&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "aff": "Reinforcement Learning and Artificial Intelligence Laboratory, Department of Computing Science, University of Alberta; Reinforcement Learning and Artificial Intelligence Laboratory, Department of Computing Science, University of Alberta; Reinforcement Learning and Artificial Intelligence Laboratory, Department of Computing Science, University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca;ualberta.ca", "email": "ualberta.ca;ualberta.ca;ualberta.ca", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3ef815416f775098fe977004015c6193-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "Department of Computing Science", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "An intriguing failing of convolutional neural networks and the CoordConv solution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11913", "id": "11913", "author_site": "Rosanne Liu, Joel Lehman, Piero Molino, Felipe Petroski Such, Eric Frank, Alex Sergeev, Jason Yosinski", "author": "Rosanne Liu; Joel Lehman; Piero Molino; Felipe Petroski Such; Eric Frank; Alex Sergeev; Jason Yosinski", "abstract": "Few ideas have enjoyed as large an impact on deep learning as convolution. For any problem involving pixels or spatial representations, common intuition holds that convolutional neural networks may be appropriate. In this paper we show a striking counterexample to this intuition via the seemingly trivial coordinate transform problem, which simply requires learning a mapping between coordinates in (x,y) Cartesian space and coordinates in one-hot pixel space. Although convolutional networks would seem appropriate for this task, we show that they fail spectacularly. We demonstrate and carefully analyze the failure first on a toy problem, at which point a simple fix becomes obvious. We call this solution CoordConv, which works by giving convolution access to its own input coordinates through the use of extra coordinate channels. Without sacrificing the computational and parametric efficiency of ordinary convolution, CoordConv allows networks to learn either complete translation invariance or varying degrees of translation dependence, as required by the end task. CoordConv solves the coordinate transform problem with perfect generalization and 150 times faster with 10--100 times fewer parameters than convolution. This stark contrast raises the question: to what extent has this inability of convolution persisted insidiously inside other tasks, subtly hampering performance from within? A complete answer to this question will require further investigation, but we show preliminary evidence that swapping convolution for CoordConv can improve models on a diverse set of tasks. Using CoordConv in a GAN produced less mode collapse as the transform between high-level spatial latents and pixels becomes easier to learn. A Faster R-CNN detection model trained on MNIST detection showed 24% better IOU when using CoordConv, and in the Reinforcement Learning (RL) domain agents playing Atari games benefit significantly from the use of CoordConv layers.", "bibtex": "@inproceedings{NEURIPS2018_60106888,\n author = {Liu, Rosanne and Lehman, Joel and Molino, Piero and Petroski Such, Felipe and Frank, Eric and Sergeev, Alex and Yosinski, Jason},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {An intriguing failing of convolutional neural networks and the CoordConv solution},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/60106888f8977b71e1f15db7bc9a88d1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/60106888f8977b71e1f15db7bc9a88d1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/60106888f8977b71e1f15db7bc9a88d1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/60106888f8977b71e1f15db7bc9a88d1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/60106888f8977b71e1f15db7bc9a88d1-Reviews.html", "metareview": "", "pdf_size": 7543764, "gs_citation": 1113, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1725137104710452960&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Uber AI Labs, San Francisco, CA, USA; Uber AI Labs, San Francisco, CA, USA; Uber AI Labs, San Francisco, CA, USA; Uber AI Labs, San Francisco, CA, USA; Uber AI Labs, San Francisco, CA, USA; Uber Technologies, Seattle, WA, USA; Uber AI Labs, San Francisco, CA, USA", "aff_domain": "uber.com;uber.com;uber.com;uber.com;uber.com;uber.com;uber.com", "email": "uber.com;uber.com;uber.com;uber.com;uber.com;uber.com;uber.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/60106888f8977b71e1f15db7bc9a88d1-Abstract.html", "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Uber AI Labs;Uber Technologies", "aff_unique_dep": "AI Labs;", "aff_unique_url": "https://www.uber.com;https://www.uber.com", "aff_unique_abbr": "Uber AI Labs;Uber", "aff_campus_unique_index": "0;0;0;0;0;1;0", "aff_campus_unique": "San Francisco;Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Analysis of Krylov Subspace Solutions of Regularized Non-Convex Quadratic Problems", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12012", "id": "12012", "author_site": "Yair Carmon, John Duchi", "author": "Yair Carmon; John C. Duchi", "abstract": "We provide convergence rates for Krylov subspace solutions to the trust-region and cubic-regularized (nonconvex) quadratic problems. Such solutions may be efficiently computed by the Lanczos method and have long been used in practice. We prove error bounds of the form $1/t^2$ and $e^{-4t/\\sqrt{\\kappa}}$, where $\\kappa$ is a condition number for the problem, and $t$ is the Krylov subspace order (number of Lanczos iterations). We also provide lower bounds showing that our analysis is sharp.", "bibtex": "@inproceedings{NEURIPS2018_349f36aa,\n author = {Carmon, Yair and Duchi, John C},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Analysis of Krylov Subspace Solutions of Regularized Non-Convex Quadratic Problems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/349f36aa789af083b8e26839bd498af9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/349f36aa789af083b8e26839bd498af9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/349f36aa789af083b8e26839bd498af9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/349f36aa789af083b8e26839bd498af9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/349f36aa789af083b8e26839bd498af9-Reviews.html", "metareview": "", "pdf_size": 1476322, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13560608438503219037&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Electrical Engineering, Stanford University; Departments of Statistics and Electrical Engineering, Stanford University", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/349f36aa789af083b8e26839bd498af9-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Department of Electrical Engineering", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Analytic solution and stationary phase approximation for the Bayesian lasso and elastic net", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11284", "id": "11284", "author": "Tom Michoel", "abstract": "The lasso and elastic net linear regression models impose a double-exponential prior distribution on the model parameters to achieve regression shrinkage and variable selection, allowing the inference of robust models from large data sets. However, there has been limited success in deriving estimates for the full posterior distribution of regression coefficients in these models, due to a need to evaluate analytically intractable partition function integrals. Here, the Fourier transform is used to express these integrals as complex-valued oscillatory integrals over \"regression frequencies\". This results in an analytic expansion and stationary phase approximation for the partition functions of the Bayesian lasso and elastic net, where the non-differentiability of the double-exponential prior has so far eluded such an approach. Use of this approximation leads to highly accurate numerical estimates for the expectation values and marginal posterior distributions of the regression coefficients, and allows for Bayesian inference of much higher dimensional models than previously possible.", "bibtex": "@inproceedings{NEURIPS2018_0245952e,\n author = {Michoel, Tom},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Analytic solution and stationary phase approximation for the Bayesian lasso and elastic net},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0245952ecff55018e2a459517fdb40e3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0245952ecff55018e2a459517fdb40e3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0245952ecff55018e2a459517fdb40e3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0245952ecff55018e2a459517fdb40e3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0245952ecff55018e2a459517fdb40e3-Reviews.html", "metareview": "", "pdf_size": 623286, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14797747024232630376&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "The Roslin Institute, The University of Edinburgh, UK+Computational Biology Unit, Department of Informatics, University of Bergen, Norway", "aff_domain": "uib.no", "email": "uib.no", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0245952ecff55018e2a459517fdb40e3-Abstract.html", "aff_unique_index": "0+1", "aff_unique_norm": "University of Edinburgh;University of Bergen", "aff_unique_dep": "The Roslin Institute;Department of Informatics", "aff_unique_url": "https://www.ed.ac.uk/roslin;https://www.uib.no", "aff_unique_abbr": "Edinburgh;UIB", "aff_campus_unique_index": "0", "aff_campus_unique": "Edinburgh;", "aff_country_unique_index": "0+1", "aff_country_unique": "United Kingdom;Norway" }, { "title": "Answerer in Questioner's Mind: Information Theoretic Approach to Goal-Oriented Visual Dialog", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11266", "id": "11266", "author_site": "Sang-Woo Lee, Yu-Jung Heo, Byoung-Tak Zhang", "author": "Sang-Woo Lee; Yu-Jung Heo; Byoung-Tak Zhang", "abstract": "Goal-oriented dialog has been given attention due to its numerous applications in artificial intelligence.\nGoal-oriented dialogue tasks occur when a questioner asks an action-oriented question and an answerer responds with the intent of letting the questioner know a correct action to take. \nTo ask the adequate question, deep learning and reinforcement learning have been recently applied. \nHowever, these approaches struggle to find a competent recurrent neural questioner, owing to the complexity of learning a series of sentences.\nMotivated by theory of mind, we propose \"Answerer in Questioner's Mind\" (AQM), a novel information theoretic algorithm for goal-oriented dialog. \nWith AQM, a questioner asks and infers based on an approximated probabilistic model of the answerer.\nThe questioner figures out the answerer\u2019s intention via selecting a plausible question by explicitly calculating the information gain of the candidate intentions and possible answers to each question.\nWe test our framework on two goal-oriented visual dialog tasks: \"MNIST Counting Dialog\" and \"GuessWhat?!\".\nIn our experiments, AQM outperforms comparative algorithms by a large margin.", "bibtex": "@inproceedings{NEURIPS2018_0829424f,\n author = {Lee, Sang-Woo and Heo, Yu-Jung and Zhang, Byoung-Tak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Answerer in Questioner\\textquotesingle s Mind: Information Theoretic Approach to Goal-Oriented Visual Dialog},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0829424ffa0d3a2547b6c9622c77de03-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0829424ffa0d3a2547b6c9622c77de03-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0829424ffa0d3a2547b6c9622c77de03-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0829424ffa0d3a2547b6c9622c77de03-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0829424ffa0d3a2547b6c9622c77de03-Reviews.html", "metareview": "", "pdf_size": 3920203, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17284060182601005851&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Clova AI Research, Naver Corp; Seoul National University; Seoul National University+Surromind Robotics", "aff_domain": "naver.com;snu.ac.kr;snu.ac.kr", "email": "naver.com;snu.ac.kr;snu.ac.kr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0829424ffa0d3a2547b6c9622c77de03-Abstract.html", "aff_unique_index": "0;1;1+2", "aff_unique_norm": "Naver Corp;Seoul National University;Surromind Robotics", "aff_unique_dep": "Clova AI Research;;", "aff_unique_url": "https://www.naver.com;https://www.snu.ac.kr;", "aff_unique_abbr": "Naver;SNU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea;" }, { "title": "Approximate Knowledge Compilation by Online Collapsed Importance Sampling", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11769", "id": "11769", "author_site": "Tal Friedman, Guy Van den Broeck", "author": "Tal Friedman; Guy Van den Broeck", "abstract": "We introduce collapsed compilation, a novel approximate inference algorithm for discrete probabilistic graphical models. It is a collapsed sampling algorithm that incrementally selects which variable to sample next based on the partial compila- tion obtained so far. This online collapsing, together with knowledge compilation inference on the remaining variables, naturally exploits local structure and context- specific independence in the distribution. These properties are used implicitly in exact inference, but are difficult to harness for approximate inference. More- over, by having a partially compiled circuit available during sampling, collapsed compilation has access to a highly effective proposal distribution for importance sampling. Our experimental evaluation shows that collapsed compilation performs well on standard benchmarks. In particular, when the amount of exact inference is equally limited, collapsed compilation is competitive with the state of the art, and outperforms it on several benchmarks.", "bibtex": "@inproceedings{NEURIPS2018_4f164cf2,\n author = {Friedman, Tal and Van den Broeck, Guy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Approximate Knowledge Compilation by Online Collapsed Importance Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4f164cf233807fc02da06599a1264dee-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4f164cf233807fc02da06599a1264dee-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4f164cf233807fc02da06599a1264dee-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4f164cf233807fc02da06599a1264dee-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4f164cf233807fc02da06599a1264dee-Reviews.html", "metareview": "", "pdf_size": 463740, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5801857808795259088&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Computer Science Department, University of California, Los Angeles, CA 90095; Computer Science Department, University of California, Los Angeles, CA 90095", "aff_domain": "cs.ucla.edu;cs.ucla.edu", "email": "cs.ucla.edu;cs.ucla.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4f164cf233807fc02da06599a1264dee-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Approximating Real-Time Recurrent Learning with Random Kronecker Factors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11637", "id": "11637", "author_site": "Asier Mujika, Florian Meier, Angelika Steger", "author": "Asier Mujika; Florian Meier; Angelika Steger", "abstract": "Despite all the impressive advances of recurrent neural networks, sequential data is still in need of better modelling. Truncated backpropagation through time (TBPTT), the learning algorithm most widely used in practice, suffers from the truncation bias, which drastically limits its ability to learn long-term dependencies.The Real Time Recurrent Learning algorithm (RTRL) addresses this issue, but its high computational requirements make it infeasible in practice. The Unbiased Online Recurrent Optimization algorithm (UORO) approximates RTRL with a smaller runtime and memory cost, but with the disadvantage of obtaining noisy gradients that also limit its practical applicability. In this paper we propose the Kronecker Factored RTRL (KF-RTRL) algorithm that uses a Kronecker product decomposition to approximate the gradients for a large class of RNNs. We show that KF-RTRL is an unbiased and memory efficient online learning algorithm. Our theoretical analysis shows that, under reasonable assumptions, the noise introduced by our algorithm is not only stable over time but also asymptotically much smaller than the one of the UORO algorithm. We also confirm these theoretical results experimentally. Further, we show empirically that the KF-RTRL algorithm captures long-term dependencies and almost matches the performance of TBPTT on real world tasks by training Recurrent Highway Networks on a synthetic string memorization task and on the Penn TreeBank task, respectively. These results indicate that RTRL based approaches might be a promising future alternative to TBPTT.", "bibtex": "@inproceedings{NEURIPS2018_dba132f6,\n author = {Mujika, Asier and Meier, Florian and Steger, Angelika},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Approximating Real-Time Recurrent Learning with Random Kronecker Factors},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dba132f6ab6a3e3d17a8d59e82105f4c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dba132f6ab6a3e3d17a8d59e82105f4c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dba132f6ab6a3e3d17a8d59e82105f4c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dba132f6ab6a3e3d17a8d59e82105f4c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dba132f6ab6a3e3d17a8d59e82105f4c-Reviews.html", "metareview": "", "pdf_size": 589879, "gs_citation": 72, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17338493045921981901&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science, ETH Z\u00fcrich, Switzerland; Department of Computer Science, ETH Z\u00fcrich, Switzerland; Department of Computer Science, ETH Z\u00fcrich, Switzerland", "aff_domain": "inf.ethz.ch;inf.ethz.ch;inf.ethz.ch", "email": "inf.ethz.ch;inf.ethz.ch;inf.ethz.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dba132f6ab6a3e3d17a8d59e82105f4c-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Approximation algorithms for stochastic clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11586", "id": "11586", "author_site": "David Harris, Shi Li, Aravind Srinivasan, Khoa Trinh, Thomas Pensyl", "author": "David Harris; Shi Li; Aravind Srinivasan; Khoa Trinh; Thomas Pensyl", "abstract": "We consider stochastic settings for clustering, and develop provably-good (approximation) algorithms for a number of these notions. These algorithms allow one to obtain better approximation ratios compared to the usual deterministic clustering setting. Additionally, they offer a number of advantages including providing fairer clustering and clustering which has better long-term behavior for each user. In particular, they ensure that", "bibtex": "@inproceedings{NEURIPS2018_3e60e09c,\n author = {Harris, David and Li, Shi and Srinivasan, Aravind and Trinh, Khoa and Pensyl, Thomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Approximation algorithms for stochastic clustering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3e60e09c222f206c725385f53d7e567c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3e60e09c222f206c725385f53d7e567c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3e60e09c222f206c725385f53d7e567c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3e60e09c222f206c725385f53d7e567c-Reviews.html", "metareview": "", "pdf_size": 285998, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15745099801435192239&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Department of Computer Science, University of Maryland, College Park, MD 20742; University at Buffalo, Buffalo, NY; Bandwidth, Inc., Raleigh, NC; Department of Computer Science and Institute for Advanced Computer Studies, University of Maryland, College Park, MD 20742; Google, Mountain View, CA 94043", "aff_domain": "gmail.com;buffalo.edu;bandwidth.com;cs.umd.edu;google.com", "email": "gmail.com;buffalo.edu;bandwidth.com;cs.umd.edu;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3e60e09c222f206c725385f53d7e567c-Abstract.html", "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University of Maryland;University at Buffalo;Bandwidth, Inc.;Google", "aff_unique_dep": "Department of Computer Science;;;Google", "aff_unique_url": "https://www/umd.edu;https://www.buffalo.edu;https://www.bandwidth.com;https://www.google.com", "aff_unique_abbr": "UMD;UB;;Google", "aff_campus_unique_index": "0;1;0;3", "aff_campus_unique": "College Park;Buffalo;;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Are GANs Created Equal? A Large-Scale Study", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11092", "id": "11092", "author_site": "Mario Lucic, Karol Kurach, Marcin Michalski, Sylvain Gelly, Olivier Bousquet", "author": "Mario Lucic; Karol Kurach; Marcin Michalski; Sylvain Gelly; Olivier Bousquet", "abstract": "Generative adversarial networks (GAN) are a powerful subclass of generative models. Despite a very rich research activity leading to numerous interesting GAN algorithms, it is still very hard to assess which algorithm(s) perform better than others. We conduct a neutral, multi-faceted large-scale empirical study on state-of-the art models and evaluation measures. We find that most models can reach similar scores with enough hyperparameter optimization and random restarts. This suggests that improvements can arise from a higher computational budget and tuning more than fundamental algorithmic changes. To overcome some limitations of the current metrics, we also propose several data sets on which precision and recall can be computed. Our experimental results suggest that future GAN research should be based on more systematic and objective evaluation procedures. Finally, we did not find evidence that any of the tested algorithms consistently outperforms the non-saturating GAN introduced in \\cite{goodfellow2014generative}.", "bibtex": "@inproceedings{NEURIPS2018_e46de7e1,\n author = {Lucic, Mario and Kurach, Karol and Michalski, Marcin and Gelly, Sylvain and Bousquet, Olivier},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Are GANs Created Equal? A Large-Scale Study},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e46de7e1bcaaced9a54f1e9d0d2f800d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e46de7e1bcaaced9a54f1e9d0d2f800d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e46de7e1bcaaced9a54f1e9d0d2f800d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e46de7e1bcaaced9a54f1e9d0d2f800d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e46de7e1bcaaced9a54f1e9d0d2f800d-Reviews.html", "metareview": "", "pdf_size": 1329243, "gs_citation": 1329, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3229217754457345915&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "aff": "Google Brain; Google Brain; Google Brain; Google Brain; Google Brain", "aff_domain": "google.com;google.com; ; ; ", "email": "google.com;google.com; ; ; ", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e46de7e1bcaaced9a54f1e9d0d2f800d-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Brain", "aff_unique_url": "https://brain.google.com", "aff_unique_abbr": "Google Brain", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Are ResNets Provably Better than Linear Predictors?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11074", "id": "11074", "author": "Ohad Shamir", "abstract": "A residual network (or ResNet) is a standard deep neural net architecture, with state-of-the-art performance across numerous applications. The main premise of ResNets is that they allow the training of each layer to focus on fitting just the residual of the previous layer's output and the target output. Thus, we should expect that the trained network is no worse than what we can obtain if we remove the residual layers and train a shallower network instead. However, due to the non-convexity of the optimization problem, it is not at all clear that ResNets indeed achieve this behavior, rather than getting stuck at some arbitrarily poor local minimum. In this paper, we rigorously prove that arbitrarily deep, nonlinear residual units indeed exhibit this behavior, in the sense that the optimization landscape contains no local minima with value above what can be obtained with a linear predictor (namely a 1-layer network). Notably, we show this under minimal or no assumptions on the precise network architecture, data distribution, or loss function used. We also provide a quantitative analysis of approximate stationary points for this problem. Finally, we show that with a certain tweak to the architecture, training the network with standard stochastic gradient descent achieves an objective value close or better than any linear predictor.", "bibtex": "@inproceedings{NEURIPS2018_26e359e8,\n author = {Shamir, Ohad},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Are ResNets Provably Better than Linear Predictors?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/26e359e83860db1d11b6acca57d8ea88-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/26e359e83860db1d11b6acca57d8ea88-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/26e359e83860db1d11b6acca57d8ea88-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/26e359e83860db1d11b6acca57d8ea88-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/26e359e83860db1d11b6acca57d8ea88-Reviews.html", "metareview": "", "pdf_size": 397018, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9883796523366053778&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science and Applied Mathematics, Weizmann Institute of Science, Rehovot, Israel", "aff_domain": "weizmann.ac.il", "email": "weizmann.ac.il", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/26e359e83860db1d11b6acca57d8ea88-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Weizmann Institute of Science", "aff_unique_dep": "Department of Computer Science and Applied Mathematics", "aff_unique_url": "https://www.weizmann.ac.il", "aff_unique_abbr": "Weizmann", "aff_campus_unique_index": "0", "aff_campus_unique": "Rehovot", "aff_country_unique_index": "0", "aff_country_unique": "Israel" }, { "title": "Assessing Generative Models via Precision and Recall", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11511", "id": "11511", "author_site": "Mehdi S. M. Sajjadi, Olivier Bachem, Mario Lucic, Olivier Bousquet, Sylvain Gelly", "author": "Mehdi S. M. Sajjadi; Olivier Bachem; Mario Lucic; Olivier Bousquet; Sylvain Gelly", "abstract": "Recent advances in generative modeling have led to an increased interest in the study of statistical divergences as means of model comparison. Commonly used evaluation methods, such as the Frechet Inception Distance (FID), correlate well with the perceived quality of samples and are sensitive to mode dropping. However, these metrics are unable to distinguish between different failure cases since they only yield one-dimensional scores. We propose a novel definition of precision and recall for distributions which disentangles the divergence into two separate dimensions. The proposed notion is intuitive, retains desirable properties, and naturally leads to an efficient algorithm that can be used to evaluate generative models. We relate this notion to total variation as well as to recent evaluation metrics such as Inception Score and FID. To demonstrate the practical utility of the proposed approach we perform an empirical study on several variants of Generative Adversarial Networks and Variational Autoencoders. In an extensive set of experiments we show that the proposed metric is able to disentangle the quality of generated samples from the coverage of the target distribution.", "bibtex": "@inproceedings{NEURIPS2018_f7696a9b,\n author = {Sajjadi, Mehdi S. M. and Bachem, Olivier and Lucic, Mario and Bousquet, Olivier and Gelly, Sylvain},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Assessing Generative Models via Precision and Recall},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f7696a9b362ac5a51c3dc8f098b73923-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f7696a9b362ac5a51c3dc8f098b73923-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f7696a9b362ac5a51c3dc8f098b73923-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f7696a9b362ac5a51c3dc8f098b73923-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f7696a9b362ac5a51c3dc8f098b73923-Reviews.html", "metareview": "", "pdf_size": 829035, "gs_citation": 704, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=651893942780229&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "MPI for Intelligent Systems, Max Planck ETH Center for Learning Systems; Google Brain; Google Brain; Google Brain; Google Brain", "aff_domain": "msajjadi.com;google.com;google.com; ; ", "email": "msajjadi.com;google.com;google.com; ; ", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f7696a9b362ac5a51c3dc8f098b73923-Abstract.html", "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Google", "aff_unique_dep": "Intelligent Systems;Google Brain", "aff_unique_url": "https://www.mpi-is.mpg.de;https://brain.google.com", "aff_unique_abbr": "MPI-IS;Google Brain", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Germany;United States" }, { "title": "Assessing the Scalability of Biologically-Motivated Deep Learning Algorithms and Architectures", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11892", "id": "11892", "author_site": "Sergey Bartunov, Adam Santoro, Blake Richards, Luke Marris, Geoffrey E Hinton, Timothy Lillicrap", "author": "Sergey Bartunov; Adam Santoro; Blake Richards; Luke Marris; Geoffrey E. Hinton; Timothy Lillicrap", "abstract": "The backpropagation of error algorithm (BP) is impossible to implement in a real brain. The recent success of deep networks in machine learning and AI, however, has inspired proposals for understanding how the brain might learn across multiple layers, and hence how it might approximate BP. As of yet, none of these proposals have been rigorously evaluated on tasks where BP-guided deep learning has proved critical, or in architectures more structured than simple fully-connected networks. Here we present results on scaling up biologically motivated models of deep learning on datasets which need deep networks with appropriate architectures to achieve good performance. We present results on the MNIST, CIFAR-10, and ImageNet datasets and explore variants of target-propagation (TP) and feedback alignment (FA) algorithms, and explore performance in both fully- and locally-connected architectures. We also introduce weight-transport-free variants of difference target propagation (DTP) modified to remove backpropagation from the penultimate layer. Many of these algorithms perform well for MNIST, but for CIFAR and ImageNet we find that TP and FA variants perform significantly worse than BP, especially for networks composed of locally connected units, opening questions about whether new architectures and algorithms are required to scale these approaches. Our results and implementation details help establish baselines for biologically motivated deep learning schemes going forward.", "bibtex": "@inproceedings{NEURIPS2018_63c3ddcc,\n author = {Bartunov, Sergey and Santoro, Adam and Richards, Blake and Marris, Luke and Hinton, Geoffrey E and Lillicrap, Timothy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Assessing the Scalability of Biologically-Motivated Deep Learning Algorithms and Architectures},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/63c3ddcc7b23daa1e42dc41f9a44a873-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/63c3ddcc7b23daa1e42dc41f9a44a873-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/63c3ddcc7b23daa1e42dc41f9a44a873-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/63c3ddcc7b23daa1e42dc41f9a44a873-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/63c3ddcc7b23daa1e42dc41f9a44a873-Reviews.html", "metareview": "", "pdf_size": 509529, "gs_citation": 333, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17862085183234087999&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "DeepMind; DeepMind; University of Toronto; DeepMind; Google Brain; DeepMind+University College London", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/63c3ddcc7b23daa1e42dc41f9a44a873-Abstract.html", "aff_unique_index": "0;0;1;0;2;0+3", "aff_unique_norm": "DeepMind;University of Toronto;Google;University College London", "aff_unique_dep": ";;Google Brain;", "aff_unique_url": "https://deepmind.com;https://www.utoronto.ca;https://brain.google.com;https://www.ucl.ac.uk", "aff_unique_abbr": "DeepMind;U of T;Google Brain;UCL", "aff_campus_unique_index": "1;", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;1;0;2;0+0", "aff_country_unique": "United Kingdom;Canada;United States" }, { "title": "Asymptotic optimality of adaptive importance sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11318", "id": "11318", "author_site": "Fran\u00e7ois Portier, Bernard Delyon", "author": "Fran\u00e7ois Portier; Bernard Delyon", "abstract": "\\textit{Adaptive importance sampling} (AIS) uses past samples to update the \\textit{sampling policy} $q_t$ at each stage $t$. Each stage $t$ is formed with two steps : (i) to explore the space with $n_t$ points according to $q_t$ and (ii) to exploit the current amount of information to update the sampling policy. The very fundamental question raised in this paper concerns the behavior of empirical sums based on AIS. Without making any assumption on the \\textit{allocation policy} $n_t$, the theory developed involves no restriction on the split of computational resources between the explore (i) and the exploit (ii) step. It is shown that AIS is asymptotically optimal : the asymptotic behavior of AIS is the same as some ``oracle'' strategy that knows the targeted sampling policy from the beginning. From a practical perspective, weighted AIS is introduced, a new method that allows to forget poor samples from early stages.", "bibtex": "@inproceedings{NEURIPS2018_1bc0249a,\n author = {Portier, Fran\\c{c}ois and Delyon, Bernard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Asymptotic optimality of adaptive importance sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1bc0249a6412ef49b07fe6f62e6dc8de-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1bc0249a6412ef49b07fe6f62e6dc8de-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1bc0249a6412ef49b07fe6f62e6dc8de-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1bc0249a6412ef49b07fe6f62e6dc8de-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1bc0249a6412ef49b07fe6f62e6dc8de-Reviews.html", "metareview": "", "pdf_size": 456577, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8154425237582783371&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff": "IRMAR, University of Rennes 1; T\u00e9l\u00e9com ParisTech, University of Paris-Saclay", "aff_domain": "univ-rennes1.fr;gmail.com", "email": "univ-rennes1.fr;gmail.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1bc0249a6412ef49b07fe6f62e6dc8de-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University of Rennes 1;T\u00e9l\u00e9com ParisTech", "aff_unique_dep": "IRMAR;", "aff_unique_url": "https://www.univ-rennes1.fr;https://www.telecom-paristech.fr", "aff_unique_abbr": "UR1;Telecom ParisTech", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Rennes;Paris", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Attacks Meet Interpretability: Attribute-steered Detection of Adversarial Samples", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11741", "id": "11741", "author_site": "Guanhong Tao, Shiqing Ma, Yingqi Liu, Xiangyu Zhang", "author": "Guanhong Tao; Shiqing Ma; Yingqi Liu; Xiangyu Zhang", "abstract": "Adversarial sample attacks perturb benign inputs to induce DNN misbehaviors. Recent research has demonstrated the widespread presence and the devastating consequences of such attacks. Existing defense techniques either assume prior knowledge of specific attacks or may not work well on complex models due to their underlying assumptions. We argue that adversarial sample attacks are deeply entangled with interpretability of DNN models: while classification results on benign inputs can be reasoned based on the human perceptible features/attributes, results on adversarial samples can hardly be explained. Therefore, we propose a novel adversarial sample detection technique for face recognition models, based on interpretability. It features a novel bi-directional correspondence inference between attributes and internal neurons to identify neurons critical for individual attributes. The activation values of critical neurons are enhanced to amplify the reasoning part of the computation and the values of other neurons are weakened to suppress the uninterpretable part. The classification results after such transformation are compared with those of the original model to detect adversaries. Results show that our technique can achieve 94% detection accuracy for 7 different kinds of attacks with 9.91% false positives on benign inputs. In contrast, a state-of-the-art feature squeezing technique can only achieve 55% accuracy with 23.3% false positives.", "bibtex": "@inproceedings{NEURIPS2018_b9946974,\n author = {Tao, Guanhong and Ma, Shiqing and Liu, Yingqi and Zhang, Xiangyu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Attacks Meet Interpretability: Attribute-steered Detection of Adversarial Samples},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b994697479c5716eda77e8e9713e5f0f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b994697479c5716eda77e8e9713e5f0f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b994697479c5716eda77e8e9713e5f0f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b994697479c5716eda77e8e9713e5f0f-Reviews.html", "metareview": "", "pdf_size": 1934313, "gs_citation": 211, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2985314933504776828&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science, Purdue University; Department of Computer Science, Purdue University; Department of Computer Science, Purdue University; Department of Computer Science, Purdue University", "aff_domain": "cs.purdue.edu;cs.purdue.edu;cs.purdue.edu;cs.purdue.edu", "email": "cs.purdue.edu;cs.purdue.edu;cs.purdue.edu;cs.purdue.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b994697479c5716eda77e8e9713e5f0f-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Attention in Convolutional LSTM for Gesture Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11207", "id": "11207", "author_site": "Liang Zhang, Guangming Zhu, Lin Mei, Peiyi Shen, Syed Afaq Ali Shah, Mohammed Bennamoun", "author": "Liang Zhang; Guangming Zhu; Lin Mei; Peiyi Shen; Syed Afaq Ali Shah; Mohammed Bennamoun", "abstract": "Convolutional long short-term memory (LSTM) networks have been widely used for action/gesture recognition, and different attention mechanisms have also been embedded into the LSTM or the convolutional LSTM (ConvLSTM) networks. Based on the previous gesture recognition architectures which combine the three-dimensional convolution neural network (3DCNN) and ConvLSTM, this paper explores the effects of attention mechanism in ConvLSTM. Several variants of ConvLSTM are evaluated: (a) Removing the convolutional structures of the three gates in ConvLSTM, (b) Applying the attention mechanism on the input of ConvLSTM, (c) Reconstructing the input and (d) output gates respectively with the modified channel-wise attention mechanism. The evaluation results demonstrate that the spatial convolutions in the three gates scarcely contribute to the spatiotemporal feature fusion, and the attention mechanisms embedded into the input and output gates cannot improve the feature fusion. In other words, ConvLSTM mainly contributes to the temporal fusion along with the recurrent steps to learn the long-term spatiotemporal features, when taking as input the spatial or spatiotemporal features. On this basis, a new variant of LSTM is derived, in which the convolutional structures are only embedded into the input-to-state transition of LSTM. The code of the LSTM variants is publicly available.", "bibtex": "@inproceedings{NEURIPS2018_287e03db,\n author = {Zhang, Liang and Zhu, Guangming and Mei, Lin and Shen, Peiyi and Shah, Syed Afaq Ali and Bennamoun, Mohammed},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Attention in Convolutional LSTM for Gesture Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/287e03db1d99e0ec2edb90d079e142f3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/287e03db1d99e0ec2edb90d079e142f3-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/287e03db1d99e0ec2edb90d079e142f3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/287e03db1d99e0ec2edb90d079e142f3-Reviews.html", "metareview": "", "pdf_size": 9117185, "gs_citation": 188, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13184940893185979866&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Xidian University; Xidian University; Xidian University; Xidian University; Central Queensland University; University of Western Australia", "aff_domain": "xidian.edu.cn;xidian.edu.cn;hotmail.com;xidian.edu.cn;uwa.edu.au;uwa.edu.au", "email": "xidian.edu.cn;xidian.edu.cn;hotmail.com;xidian.edu.cn;uwa.edu.au;uwa.edu.au", "github": "https://github.com/GuangmingZhu/AttentionConvLSTM", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/287e03db1d99e0ec2edb90d079e142f3-Abstract.html", "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Xidian University;Central Queensland University;University of Western Australia", "aff_unique_dep": ";;", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.cqu.edu.au;https://www.uwa.edu.au", "aff_unique_abbr": "Xidian;CQU;UWA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "China;Australia" }, { "title": "Autoconj: Recognizing and Exploiting Conjugacy Without a Domain-Specific Language", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12013", "id": "12013", "author_site": "Matthew D. Hoffman, Matthew Johnson, Dustin Tran", "author": "Matthew D. Hoffman; Matthew J Johnson; Dustin Tran", "abstract": "Deriving conditional and marginal distributions using conjugacy relationships can be time consuming and error prone. In this paper, we propose a strategy for automating such derivations. Unlike previous systems which focus on relationships between pairs of random variables, our system (which we call Autoconj) operates directly on Python functions that compute log-joint distribution functions. Autoconj provides support for conjugacy-exploiting algorithms in any Python-embedded PPL. This paves the way for accelerating development of novel inference algorithms and structure-exploiting modeling strategies. The package can be downloaded at https://github.com/google-research/autoconj.", "bibtex": "@inproceedings{NEURIPS2018_9b89bedd,\n author = {Hoffman, Matthew D. and Johnson, Matthew J and Tran, Dustin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Autoconj: Recognizing and Exploiting Conjugacy Without a Domain-Specific Language},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9b89bedda1fc8a2d88c448e361194f02-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9b89bedda1fc8a2d88c448e361194f02-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9b89bedda1fc8a2d88c448e361194f02-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9b89bedda1fc8a2d88c448e361194f02-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9b89bedda1fc8a2d88c448e361194f02-Reviews.html", "metareview": "", "pdf_size": 510496, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10948786372244458956&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Google AI; Google Brain; Google Brain", "aff_domain": "google.com;google.com;google.com", "email": "google.com;google.com;google.com", "github": "https://github.com/google-research/autoconj", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9b89bedda1fc8a2d88c448e361194f02-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google AI", "aff_unique_url": "https://ai.google", "aff_unique_abbr": "Google AI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Automatic Program Synthesis of Long Programs with a Learned Garbage Collector", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11221", "id": "11221", "author_site": "Amit Zohar, Lior Wolf", "author": "Amit Zohar; Lior Wolf", "abstract": "We consider the problem of generating automatic code given sample input-output pairs. We train a neural network to map from the current state and the outputs to the program's next statement. The neural network optimizes multiple tasks concurrently: the next operation out of a set of high level commands, the operands of the next statement, and which variables can be dropped from memory. Using our method we are able to create programs that are more than twice as long as existing state-of-the-art solutions, while improving the success rate for comparable lengths, and cutting the run-time by two orders of magnitude. Our code, including an implementation of various literature baselines, is publicly available at https://github.com/amitz25/PCCoder", "bibtex": "@inproceedings{NEURIPS2018_390e9825,\n author = {Zohar, Amit and Wolf, Lior},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Automatic Program Synthesis of Long Programs with a Learned Garbage Collector},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/390e982518a50e280d8e2b535462ec1f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/390e982518a50e280d8e2b535462ec1f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/390e982518a50e280d8e2b535462ec1f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/390e982518a50e280d8e2b535462ec1f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/390e982518a50e280d8e2b535462ec1f-Reviews.html", "metareview": "", "pdf_size": 1164953, "gs_citation": 95, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8202429186928135403&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "The School of Computer Science, Tel Aviv University; The School of Computer Science, Tel Aviv University + Facebook AI Research", "aff_domain": "; ", "email": "; ", "github": "https://github.com/amitz25/PCCoder", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/390e982518a50e280d8e2b535462ec1f-Abstract.html", "aff_unique_index": "0;0+1", "aff_unique_norm": "Tel Aviv University;Meta", "aff_unique_dep": "School of Computer Science;Facebook AI Research", "aff_unique_url": "https://www.tau.ac.il;https://research.facebook.com", "aff_unique_abbr": "TAU;FAIR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Tel Aviv;", "aff_country_unique_index": "0;0+1", "aff_country_unique": "Israel;United States" }, { "title": "Automatic differentiation in ML: Where we are and where we should be going", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11836", "id": "11836", "author_site": "Bart van Merri\u00ebnboer, Olivier Breuleux, Arnaud Bergeron, Pascal Lamblin", "author": "Bart van Merrienboer; Olivier Breuleux; Arnaud Bergeron; Pascal Lamblin", "abstract": "We review the current state of automatic differentiation (AD) for array programming in machine learning (ML), including the different approaches such as operator overloading (OO) and source transformation (ST) used for AD, graph-based intermediate representations for programs, and source languages. Based on these insights, we introduce a new graph-based intermediate representation (IR) which specifically aims to efficiently support fully-general AD for array programming. Unlike existing dataflow programming representations in ML frameworks, our IR naturally supports function calls, higher-order functions and recursion, making ML models easier to implement. The ability to represent closures allows us to perform AD using ST without a tape, making the resulting derivative (adjoint) program amenable to ahead-of-time optimization using tools from functional language compilers, and enabling higher-order derivatives. Lastly, we introduce a proof of concept compiler toolchain called Myia which uses a subset of Python as a front end.", "bibtex": "@inproceedings{NEURIPS2018_770f8e44,\n author = {van Merrienboer, Bart and Breuleux, Olivier and Bergeron, Arnaud and Lamblin, Pascal},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Automatic differentiation in ML: Where we are and where we should be going},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/770f8e448d07586afbf77bb59f698587-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/770f8e448d07586afbf77bb59f698587-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/770f8e448d07586afbf77bb59f698587-Metadata.json", "review": "", "metareview": "", "pdf_size": 623270, "gs_citation": 109, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11874990560582038809&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Mila, Google Brain; Mila; Mila; Mila, Google Brain", "aff_domain": "google.com;iro.umontreal.ca;iro.umontreal.ca;google.com", "email": "google.com;iro.umontreal.ca;iro.umontreal.ca;google.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/770f8e448d07586afbf77bb59f698587-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Mila", "aff_unique_dep": "", "aff_unique_url": "https://mila.quebec", "aff_unique_abbr": "Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Automating Bayesian optimization with Bayesian optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11581", "id": "11581", "author_site": "Gustavo Malkomes, Roman Garnett", "author": "Gustavo Malkomes; Roman Garnett", "abstract": "Bayesian optimization is a powerful tool for global optimization of expensive functions. One of its key components is the underlying probabilistic model used for the objective function f. In practice, however, it is often unclear how one should appropriately choose a model, especially when gathering data is expensive. In this work, we introduce a novel automated Bayesian optimization approach that dynamically selects promising models for explaining the observed data using Bayesian Optimization in the model space. Crucially, we account for the uncertainty in the choice of model; our method is capable of using multiple models to represent its current belief about f and subsequently using this information for decision making. We argue, and demonstrate empirically, that our approach automatically finds suitable models for the objective function, which ultimately results in more-efficient optimization.", "bibtex": "@inproceedings{NEURIPS2018_2b64c2f1,\n author = {Malkomes, Gustavo and Garnett, Roman},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Automating Bayesian optimization with Bayesian optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2b64c2f19d868305aa8bbc2d72902cc5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2b64c2f19d868305aa8bbc2d72902cc5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2b64c2f19d868305aa8bbc2d72902cc5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2b64c2f19d868305aa8bbc2d72902cc5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2b64c2f19d868305aa8bbc2d72902cc5-Reviews.html", "metareview": "", "pdf_size": 476170, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5327934076146314919&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Department of Computer Science and Engineering, Washington University in St. Louis; Department of Computer Science and Engineering, Washington University in St. Louis", "aff_domain": "wustl.edu;wustl.edu", "email": "wustl.edu;wustl.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2b64c2f19d868305aa8bbc2d72902cc5-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Washington University in St. Louis", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://wustl.edu", "aff_unique_abbr": "WashU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "St. Louis", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "BML: A High-performance, Low-cost Gradient Synchronization Algorithm for DML Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11420", "id": "11420", "author_site": "Songtao Wang, Dan Li, Yang Cheng, Jinkun Geng, Yanshu Wang, Shuai Wang, Shu-Tao Xia, Jianping Wu", "author": "Songtao Wang; Dan Li; Yang Cheng; Jinkun Geng; Yanshu Wang; Shuai Wang; Shu-Tao Xia; Jianping Wu", "abstract": "In distributed machine learning (DML), the network performance between machines significantly impacts the speed of iterative training. In this paper we propose BML, a new gradient synchronization algorithm with higher network performance and lower network cost than the current practice. BML runs on BCube network, instead of using the traditional Fat-Tree topology. BML algorithm is designed in such a way that, compared to the parameter server (PS) algorithm on a Fat-Tree network connecting the same number of server machines, BML achieves theoretically 1/k of the gradient synchronization time, with k/5 of switches (the typical number of k is 2\u223c4). Experiments of LeNet-5 and VGG-19 benchmarks on a testbed with 9 dual-GPU servers show that, BML reduces the job completion time of DML training by up to 56.4%.", "bibtex": "@inproceedings{NEURIPS2018_f410588e,\n author = {Wang, Songtao and Li, Dan and Cheng, Yang and Geng, Jinkun and Wang, Yanshu and Wang, Shuai and Xia, Shu-Tao and Wu, Jianping},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {BML: A High-performance, Low-cost Gradient Synchronization Algorithm for DML Training},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f410588e48dc83f2822a880a68f78923-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f410588e48dc83f2822a880a68f78923-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f410588e48dc83f2822a880a68f78923-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f410588e48dc83f2822a880a68f78923-Reviews.html", "metareview": "", "pdf_size": 2183274, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15332271029508807610&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": ";;;;;;;", "aff_domain": ";;;;;;;", "email": ";;;;;;;", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f410588e48dc83f2822a880a68f78923-Abstract.html" }, { "title": "BRITS: Bidirectional Recurrent Imputation for Time Series", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11654", "id": "11654", "author_site": "Wei Cao, Dong Wang, Jian Li, Hao Zhou, Lei Li, Yitan Li", "author": "Wei Cao; Dong Wang; Jian Li; Hao Zhou; Lei Li; Yitan Li", "abstract": "Time series are widely used as signals in many classification/regression tasks. It is ubiquitous that time series contains many missing values. Given multiple correlated time series data, how to fill in missing values and to predict their class labels? Existing imputation methods often impose strong assumptions of the underlying data generating process, such as linear dynamics in the state space. \nIn this paper, we propose BRITS, a novel method based on recurrent neural networks for missing value imputation in time series data. Our proposed method directly learns the missing values in a bidirectional recurrent dynamical system, without any specific assumption. The imputed values are treated as variables of RNN graph and can be effectively updated during the backpropagation. BRITS has three advantages: (a) it can handle multiple correlated missing values in time series; (b) it generalizes to time series with nonlinear dynamics underlying; (c) it provides a data-driven imputation procedure and applies to general settings with missing data.\nWe evaluate our model on three real-world datasets, including an air quality dataset, a health-care data, and a localization data for human activity.\nExperiments show that our model outperforms the state-of-the-art methods in both imputation and classification/regression accuracies.", "bibtex": "@inproceedings{NEURIPS2018_734e6bfc,\n author = {Cao, Wei and Wang, Dong and Li, Jian and Zhou, Hao and Li, Lei and Li, Yitan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {BRITS: Bidirectional Recurrent Imputation for Time Series},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/734e6bfcd358e25ac1db0a4241b95651-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/734e6bfcd358e25ac1db0a4241b95651-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/734e6bfcd358e25ac1db0a4241b95651-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/734e6bfcd358e25ac1db0a4241b95651-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/734e6bfcd358e25ac1db0a4241b95651-Reviews.html", "metareview": "", "pdf_size": 386396, "gs_citation": 910, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17928129084181066672&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Tsinghua University+Bytedance AI Lab; Duke University; Tsinghua University; Bytedance AI Lab; Bytedance AI Lab; Bytedance AI Lab", "aff_domain": "tsinghua.org.cn;duke.edu;mail.tsinghua.edu.cn;gmail.com;bytedance.com;bytedance.com", "email": "tsinghua.org.cn;duke.edu;mail.tsinghua.edu.cn;gmail.com;bytedance.com;bytedance.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/734e6bfcd358e25ac1db0a4241b95651-Abstract.html", "aff_unique_index": "0+1;2;0;1;1;1", "aff_unique_norm": "Tsinghua University;ByteDance;Duke University", "aff_unique_dep": ";AI Lab;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.bytedance.com;https://www.duke.edu", "aff_unique_abbr": "THU;Bytedance AI Lab;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "BRUNO: A Deep Recurrent Model for Exchangeable Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11692", "id": "11692", "author_site": "Iryna Korshunova, Jonas Degrave, Ferenc Huszar, Yarin Gal, Arthur Gretton, Joni Dambre", "author": "Iryna Korshunova; Jonas Degrave; Ferenc Huszar; Yarin Gal; Arthur Gretton; Joni Dambre", "abstract": "We present a novel model architecture which leverages deep learning tools to perform exact Bayesian inference on sets of high dimensional, complex observations. Our model is provably exchangeable, meaning that the joint distribution over observations is invariant under permutation: this property lies at the heart of Bayesian inference. The model does not require variational approximations to train, and new samples can be generated conditional on previous samples, with cost linear in the size of the conditioning set. The advantages of our architecture are demonstrated on learning tasks that require generalisation from short observed sequences while modelling sequence variability, such as conditional image generation, few-shot learning, and anomaly detection.", "bibtex": "@inproceedings{NEURIPS2018_1b9f3826,\n author = {Korshunova, Iryna and Degrave, Jonas and Huszar, Ferenc and Gal, Yarin and Gretton, Arthur and Dambre, Joni},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {BRUNO: A Deep Recurrent Model for Exchangeable Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1b9f38268c50805669fd8caf8f3cc84a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1b9f38268c50805669fd8caf8f3cc84a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1b9f38268c50805669fd8caf8f3cc84a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1b9f38268c50805669fd8caf8f3cc84a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1b9f38268c50805669fd8caf8f3cc84a-Reviews.html", "metareview": "", "pdf_size": 343468, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9358687651511071079&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Ghent University; Ghent University + DeepMind; Twitter; University of Oxford; Gatsby Unit, UCL; Ghent University", "aff_domain": "ugent.be;ugent.be;twitter.com;cs.ox.ac.uk;gmail.com;ugent.be", "email": "ugent.be;ugent.be;twitter.com;cs.ox.ac.uk;gmail.com;ugent.be", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1b9f38268c50805669fd8caf8f3cc84a-Abstract.html", "aff_unique_index": "0;0+1;2;3;4;0", "aff_unique_norm": "Ghent University;DeepMind;Twitter, Inc.;University of Oxford;University College London", "aff_unique_dep": ";;;;Gatsby Unit", "aff_unique_url": "https://www.ugent.be/en;https://deepmind.com;https://twitter.com;https://www.ox.ac.uk;https://www.ucl.ac.uk", "aff_unique_abbr": "UGent;DeepMind;Twitter;Oxford;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+1;2;1;1;0", "aff_country_unique": "Belgium;United Kingdom;United States" }, { "title": "Backpropagation with Callbacks: Foundations for Efficient and Expressive Differentiable Programming", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11965", "id": "11965", "author_site": "Fei Wang, James Decker, Xilun Wu, Gregory Essertel, Tiark Rompf", "author": "Fei Wang; James Decker; Xilun Wu; Gregory Essertel; Tiark Rompf", "abstract": "Training of deep learning models depends on gradient descent and end-to-end\ndifferentiation. Under the slogan of differentiable programming, there is an\nincreasing demand for efficient automatic gradient computation for emerging\nnetwork architectures that incorporate dynamic control flow, especially in NLP.", "bibtex": "@inproceedings{NEURIPS2018_34e15776,\n author = {Wang, Fei and Decker, James and Wu, Xilun and Essertel, Gregory and Rompf, Tiark},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Backpropagation with Callbacks: Foundations for Efficient and Expressive Differentiable Programming},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/34e157766f31db3d2099831d348a7933-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/34e157766f31db3d2099831d348a7933-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/34e157766f31db3d2099831d348a7933-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/34e157766f31db3d2099831d348a7933-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/34e157766f31db3d2099831d348a7933-Reviews.html", "metareview": "", "pdf_size": 450525, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16881456781595184058&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Purdue University; Purdue University; Purdue University; Purdue University; Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu;purdue.edu;purdue.edu", "email": "purdue.edu;purdue.edu;purdue.edu;purdue.edu;purdue.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/34e157766f31db3d2099831d348a7933-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Balanced Policy Evaluation and Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11849", "id": "11849", "author": "Nathan Kallus", "abstract": "We present a new approach to the problems of evaluating and learning personalized decision policies from observational data of past contexts, decisions, and outcomes. Only the outcome of the enacted decision is available and the historical policy is unknown. These problems arise in personalized medicine using electronic health records and in internet advertising. Existing approaches use inverse propensity weighting (or, doubly robust versions) to make historical outcome (or, residual) data look like it were generated by a new policy being evaluated or learned. But this relies on a plug-in approach that rejects data points with a decision that disagrees with the new policy, leading to high variance estimates and ineffective learning. We propose a new, balance-based approach that too makes the data look like the new policy but does so directly by finding weights that optimize for balance between the weighted data and the target policy in the given, finite sample, which is equivalent to minimizing worst-case or posterior conditional mean square error. Our policy learner proceeds as a two-level optimization problem over policies and weights. We demonstrate that this approach markedly outperforms existing ones both in evaluation and learning, which is unsurprising given the wider support of balance-based weights. We establish extensive theoretical consistency guarantees and regret bounds that support this empirical success.", "bibtex": "@inproceedings{NEURIPS2018_6616758d,\n author = {Kallus, Nathan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Balanced Policy Evaluation and Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6616758da438b02b8d360ad83a5b3d77-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6616758da438b02b8d360ad83a5b3d77-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6616758da438b02b8d360ad83a5b3d77-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6616758da438b02b8d360ad83a5b3d77-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6616758da438b02b8d360ad83a5b3d77-Reviews.html", "metareview": "", "pdf_size": 4212977, "gs_citation": 304, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10974581202350725277&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Cornell University and Cornell Tech", "aff_domain": "cornell.edu", "email": "cornell.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6616758da438b02b8d360ad83a5b3d77-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "0", "aff_campus_unique": "Ithaca", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Banach Wasserstein GAN", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11652", "id": "11652", "author_site": "Jonas Adler, Sebastian Lunz", "author": "Jonas Adler; Sebastian Lunz", "abstract": "Wasserstein Generative Adversarial Networks (WGANs) can be used to generate realistic samples from complicated image distributions. The Wasserstein metric used in WGANs is based on a notion of distance between individual images, which induces a notion of distance between probability distributions of images. So far the community has considered $\\ell^2$ as the underlying distance. We generalize the theory of WGAN with gradient penalty to Banach spaces, allowing practitioners to select the features to emphasize in the generator. We further discuss the effect of some particular choices of underlying norms, focusing on Sobolev norms. Finally, we demonstrate a boost in performance for an appropriate choice of norm on CIFAR-10 and CelebA.", "bibtex": "@inproceedings{NEURIPS2018_91d0dbfd,\n author = {Adler, Jonas and Lunz, Sebastian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Banach Wasserstein GAN},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/91d0dbfd38d950cb716c4dd26c5da08a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/91d0dbfd38d950cb716c4dd26c5da08a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/91d0dbfd38d950cb716c4dd26c5da08a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/91d0dbfd38d950cb716c4dd26c5da08a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/91d0dbfd38d950cb716c4dd26c5da08a-Reviews.html", "metareview": "", "pdf_size": 2453653, "gs_citation": 397, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10419609167162928003&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Mathematics, KTH - Royal institute of Technology + Elekta; Department of Applied Mathematics and Theoretical Physics, University of Cambridge", "aff_domain": "kth.se;math.cam.ac.uk", "email": "kth.se;math.cam.ac.uk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/91d0dbfd38d950cb716c4dd26c5da08a-Abstract.html", "aff_unique_index": "0+1;2", "aff_unique_norm": "KTH - Royal Institute of Technology;Elekta;University of Cambridge", "aff_unique_dep": "Department of Mathematics;;Department of Applied Mathematics and Theoretical Physics", "aff_unique_url": "https://www.kth.se;https://www.elekta.com;https://www.cam.ac.uk", "aff_unique_abbr": "KTH;;Cambridge", "aff_campus_unique_index": ";1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0+0;1", "aff_country_unique": "Sweden;United Kingdom" }, { "title": "Bandit Learning in Concave N-Person Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11552", "id": "11552", "author_site": "Mario Bravo, David Leslie, Panayotis Mertikopoulos", "author": "Mario Bravo; David Leslie; Panayotis Mertikopoulos", "abstract": "This paper examines the long-run behavior of learning with bandit feedback in non-cooperative concave games. The bandit framework accounts for extremely low-information environments where the agents may not even know they are playing a game; as such, the agents\u2019 most sensible choice in this setting would be to employ a no-regret learning algorithm. In general, this does not mean that the players' behavior stabilizes in the long run: no-regret learning may lead to cycles, even with perfect gradient information. However, if a standard monotonicity condition is satisfied, our analysis shows that no-regret learning based on mirror descent with bandit feedback converges to Nash equilibrium with probability 1. We also derive an upper bound for the convergence rate of the process that nearly matches the best attainable rate for single-agent bandit stochastic optimization.", "bibtex": "@inproceedings{NEURIPS2018_47fd3c87,\n author = {Bravo, Mario and Leslie, David and Mertikopoulos, Panayotis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bandit Learning in Concave N-Person Games},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/47fd3c87f42f55d4b233417d49c34783-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/47fd3c87f42f55d4b233417d49c34783-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/47fd3c87f42f55d4b233417d49c34783-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/47fd3c87f42f55d4b233417d49c34783-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/47fd3c87f42f55d4b233417d49c34783-Reviews.html", "metareview": "", "pdf_size": 366016, "gs_citation": 154, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5152447897402134917&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "aff": "Universidad de Santiago de Chile, Departamento de Matem\u00e1tica y Ciencia de la Computaci\u00f3n; Lancaster University & PROWLER.io; Univ. Grenoble Alpes, CNRS, Inria, Grenoble INP, LIG 38000 Grenoble, France", "aff_domain": "usach.cl;lancaster.ac.uk;imag.fr", "email": "usach.cl;lancaster.ac.uk;imag.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/47fd3c87f42f55d4b233417d49c34783-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Universidad de Santiago de Chile;Lancaster University;Universite Grenoble Alpes", "aff_unique_dep": "Departamento de Matem\u00e1tica y Ciencia de la Computaci\u00f3n;;Laboratoire d'Informatique de Grenoble (LIG)", "aff_unique_url": "https://www.uss.cl;https://www.lancaster.ac.uk;https://www.univ-grenoble-alpes.fr", "aff_unique_abbr": "USACH;Lancaster;UGA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Grenoble", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Chile;United Kingdom;France" }, { "title": "Bandit Learning with Implicit Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11701", "id": "11701", "author_site": "Yi Qi, Qingyun Wu, Hongning Wang, Jie Tang, Maosong Sun", "author": "Yi Qi; Qingyun Wu; Hongning Wang; Jie Tang; Maosong Sun", "abstract": "Implicit feedback, such as user clicks, although abundant in online information service systems, does not provide substantial evidence on users' evaluation of system's output. Without proper modeling, such incomplete supervision inevitably misleads model estimation, especially in a bandit learning setting where the feedback is acquired on the fly. In this work, we perform contextual bandit learning with implicit feedback by modeling the feedback as a composition of user result examination and relevance judgment. Since users' examination behavior is unobserved, we introduce latent variables to model it. We perform Thompson sampling on top of variational Bayesian inference for arm selection and model update. Our upper regret bound analysis of the proposed algorithm proves its feasibility of learning from implicit feedback in a bandit setting; and extensive empirical evaluations on click logs collected from a major MOOC platform further demonstrate its learning effectiveness in practice.", "bibtex": "@inproceedings{NEURIPS2018_d8c9d05e,\n author = {Qi, Yi and Wu, Qingyun and Wang, Hongning and Tang, Jie and Sun, Maosong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bandit Learning with Implicit Feedback},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d8c9d05ec6e86d5bbad7a2f88a1701d0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d8c9d05ec6e86d5bbad7a2f88a1701d0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d8c9d05ec6e86d5bbad7a2f88a1701d0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d8c9d05ec6e86d5bbad7a2f88a1701d0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d8c9d05ec6e86d5bbad7a2f88a1701d0-Reviews.html", "metareview": "", "pdf_size": 585384, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11670456531413289871&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "State Key Lab of Intell. Tech. & Sys., Institution for Arti\ufb01cial Intelligence, Dept. of Comp. Sci. & Tech., Tsinghua University, Beijing, China; Department of Computer Science, University of Virginia; Department of Computer Science, University of Virginia; State Key Lab of Intell. Tech. & Sys., Institution for Arti\ufb01cial Intelligence, Dept. of Comp. Sci. & Tech., Tsinghua University, Beijing, China; State Key Lab of Intell. Tech. & Sys., Institution for Arti\ufb01cial Intelligence, Dept. of Comp. Sci. & Tech., Tsinghua University, Beijing, China", "aff_domain": "mails.tsinghua.edu.cn;virginia.edu;virginia.edu;tsinghua.edu.cn;tsinghua.edu.cn", "email": "mails.tsinghua.edu.cn;virginia.edu;virginia.edu;tsinghua.edu.cn;tsinghua.edu.cn", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d8c9d05ec6e86d5bbad7a2f88a1701d0-Abstract.html", "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Tsinghua University;University of Virginia", "aff_unique_dep": "Dept. of Comp. Sci. & Tech.;Department of Computer Science", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.virginia.edu", "aff_unique_abbr": "THU;UVA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Bandit Learning with Positive Externalities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11482", "id": "11482", "author_site": "Virag Shah, Jose Blanchet, Ramesh Johari", "author": "Virag Shah; Jose Blanchet; Ramesh Johari", "abstract": "In many platforms, user arrivals exhibit a self-reinforcing behavior: future user arrivals are likely to have preferences similar to users who were satisfied in the past. In other words, arrivals exhibit {\\em positive externalities}. We study multiarmed bandit (MAB) problems with positive externalities. We show that the self-reinforcing preferences may lead standard benchmark algorithms such as UCB to exhibit linear regret. We develop a new algorithm, Balanced Exploration (BE), which explores arms carefully to avoid suboptimal convergence of arrivals before sufficient evidence is gathered. We also introduce an adaptive variant of BE which successively eliminates suboptimal arms. We analyze their asymptotic regret, and establish optimality by showing that no algorithm can perform better.", "bibtex": "@inproceedings{NEURIPS2018_ea159dc9,\n author = {Shah, Virag and Blanchet, Jose and Johari, Ramesh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bandit Learning with Positive Externalities},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ea159dc9788ffac311592613b7f71fbb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ea159dc9788ffac311592613b7f71fbb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ea159dc9788ffac311592613b7f71fbb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ea159dc9788ffac311592613b7f71fbb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ea159dc9788ffac311592613b7f71fbb-Reviews.html", "metareview": "", "pdf_size": 469663, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6298225999916396855&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Management Science and Engineering, Stanford University, California, USA 94305; Management Science and Engineering, Stanford University, California, USA 94305; Management Science and Engineering, Stanford University, California, USA 94305", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ea159dc9788ffac311592613b7f71fbb-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Management Science and Engineering", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "California", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Batch-Instance Normalization for Adaptively Style-Invariant Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11264", "id": "11264", "author_site": "Hyeonseob Nam, Hyo-Eun Kim", "author": "Hyeonseob Nam; Hyo-Eun Kim", "abstract": "Real-world image recognition is often challenged by the variability of visual styles including object textures, lighting conditions, filter effects, etc. Although these variations have been deemed to be implicitly handled by more training data and deeper networks, recent advances in image style transfer suggest that it is also possible to explicitly manipulate the style information. Extending this idea to general visual recognition problems, we present Batch-Instance Normalization (BIN) to explicitly normalize unnecessary styles from images. Considering certain style features play an essential role in discriminative tasks, BIN learns to selectively normalize only disturbing styles while preserving useful styles. The proposed normalization module is easily incorporated into existing network architectures such as Residual Networks, and surprisingly improves the recognition performance in various scenarios. Furthermore, experiments verify that BIN effectively adapts to completely different tasks like object classification and style transfer, by controlling the trade-off between preserving and removing style variations. BIN can be implemented with only a few lines of code using popular deep learning frameworks.", "bibtex": "@inproceedings{NEURIPS2018_018b59ce,\n author = {Nam, Hyeonseob and Kim, Hyo-Eun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Batch-Instance Normalization for Adaptively Style-Invariant Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/018b59ce1fd616d874afad0f44ba338d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/018b59ce1fd616d874afad0f44ba338d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/018b59ce1fd616d874afad0f44ba338d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/018b59ce1fd616d874afad0f44ba338d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/018b59ce1fd616d874afad0f44ba338d-Reviews.html", "metareview": "", "pdf_size": 1731902, "gs_citation": 284, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10695085476541761892&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Lunit Inc. Seoul, South Korea; Lunit Inc. Seoul, South Korea", "aff_domain": "lunit.io;lunit.io", "email": "lunit.io;lunit.io", "github": "https://github.com/hyeonseob-nam/Batch-Instance-Normalization", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/018b59ce1fd616d874afad0f44ba338d-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Lunit Inc.", "aff_unique_dep": "", "aff_unique_url": "https://www.lunit.io", "aff_unique_abbr": "Lunit", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Bayesian Adversarial Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11664", "id": "11664", "author_site": "Nanyang Ye, Zhanxing Zhu", "author": "Nanyang Ye; Zhanxing Zhu", "abstract": "Deep neural networks have been known to be vulnerable to adversarial attacks, raising lots of security concerns in the practical deployment. Popular defensive approaches can be formulated as a (distributionally) robust optimization problem, which minimizes a ``point estimate'' of worst-case loss derived from either per-datum perturbation or adversary data-generating distribution within certain pre-defined constraints. This point estimate ignores potential test adversaries that are beyond the pre-defined constraints. The model robustness might deteriorate sharply in the scenario of stronger test adversarial data. In this work, a novel robust training framework is proposed to alleviate this issue, Bayesian Robust Learning, in which a distribution is put on the adversarial data-generating distribution to account for the uncertainty of the adversarial data-generating process. The uncertainty directly helps to consider the potential adversaries that are stronger than the point estimate in the cases of distributionally robust optimization. The uncertainty of model parameters is also incorporated to accommodate the full Bayesian framework. We design a scalable Markov Chain Monte Carlo sampling strategy to obtain the posterior distribution over model parameters. Various experiments are conducted to verify the superiority of BAL over existing adversarial training methods. The code for BAL is available at \\url{https://tinyurl.com/ycxsaewr\n}.", "bibtex": "@inproceedings{NEURIPS2018_586f9b40,\n author = {Ye, Nanyang and Zhu, Zhanxing},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Adversarial Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/586f9b4035e5997f77635b13cc04984c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/586f9b4035e5997f77635b13cc04984c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/586f9b4035e5997f77635b13cc04984c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/586f9b4035e5997f77635b13cc04984c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/586f9b4035e5997f77635b13cc04984c-Reviews.html", "metareview": "", "pdf_size": 2993681, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16905805844691036826&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff": ";", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/586f9b4035e5997f77635b13cc04984c-Abstract.html" }, { "title": "Bayesian Alignments of Warped Multi-Output Gaussian Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11674", "id": "11674", "author_site": "Markus Kaiser, Clemens Otte, Thomas Runkler, Carl Henrik Ek", "author": "Markus Kaiser; Clemens Otte; Thomas Runkler; Carl Henrik Ek", "abstract": "We propose a novel Bayesian approach to modelling nonlinear alignments of time series based on latent shared information. We apply the method to the real-world problem of finding common structure in the sensor data of wind turbines introduced by the underlying latent and turbulent wind field. The proposed model allows for both arbitrary alignments of the inputs and non-parametric output warpings to transform the observations. This gives rise to multiple deep Gaussian process models connected via latent generating processes. We present an efficient variational approximation based on nested variational compression and show how the model can be used to extract shared information between dependent time series, recovering an interpretable functional decomposition of the learning problem. We show results for an artificial data set and real-world data of two wind turbines.", "bibtex": "@inproceedings{NEURIPS2018_2974788b,\n author = {Kaiser, Markus and Otte, Clemens and Runkler, Thomas and Ek, Carl Henrik},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Alignments of Warped Multi-Output Gaussian Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2974788b53f73e7950e8aa49f3a306db-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2974788b53f73e7950e8aa49f3a306db-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2974788b53f73e7950e8aa49f3a306db-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2974788b53f73e7950e8aa49f3a306db-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2974788b53f73e7950e8aa49f3a306db-Reviews.html", "metareview": "", "pdf_size": 2242157, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10407471665913263717&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Siemens AG + Technical University of Munich; Siemens AG; Siemens AG + Technical University of Munich; University of Bristol", "aff_domain": "siemens.com;siemens.com;siemens.com;bristol.ac.uk", "email": "siemens.com;siemens.com;siemens.com;bristol.ac.uk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2974788b53f73e7950e8aa49f3a306db-Abstract.html", "aff_unique_index": "0+1;0;0+1;2", "aff_unique_norm": "Siemens AG;Technical University of Munich;University of Bristol", "aff_unique_dep": ";;", "aff_unique_url": "https://www.siemens.com;https://www.tum.de;https://www.bristol.ac.uk", "aff_unique_abbr": "Siemens;TUM;Bristol", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;0+0;1", "aff_country_unique": "Germany;United Kingdom" }, { "title": "Bayesian Control of Large MDPs with Unknown Dynamics in Data-Poor Environments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11780", "id": "11780", "author_site": "Mahdi Imani, Seyede Fatemeh Ghoreishi, Ulisses M. Braga-Neto", "author": "Mahdi Imani; Seyede Fatemeh Ghoreishi; Ulisses M. Braga-Neto", "abstract": "We propose a Bayesian decision making framework for control of Markov Decision Processes (MDPs) with unknown dynamics and large, possibly continuous, state, action, and parameter spaces in data-poor environments. Most of the existing adaptive controllers for MDPs with unknown dynamics are based on the reinforcement learning framework and rely on large data sets acquired by sustained direct interaction with the system or via a simulator. This is not feasible in many applications, due to ethical, economic, and physical constraints. The proposed framework addresses the data poverty issue by decomposing the problem into an offline planning stage that does not rely on sustained direct interaction with the system or simulator and an online execution stage. In the offline process, parallel Gaussian process temporal difference (GPTD) learning techniques are employed for near-optimal Bayesian approximation of the expected discounted reward over a sample drawn from the prior distribution of unknown parameters. In the online stage, the action with the maximum expected return with respect to the posterior distribution of the parameters is selected. This is achieved by an approximation of the posterior distribution using a Markov Chain Monte Carlo (MCMC) algorithm, followed by constructing multiple Gaussian processes over the parameter space for efficient prediction of the means of the expected return at the MCMC sample. The effectiveness of the proposed framework is demonstrated using a simple dynamical system model with continuous state and action spaces, as well as a more complex model for a metastatic melanoma gene regulatory network observed through noisy synthetic gene expression data.", "bibtex": "@inproceedings{NEURIPS2018_2cbd9c54,\n author = {Imani, Mahdi and Ghoreishi, Seyede Fatemeh and Braga-Neto, Ulisses M.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Control of Large MDPs with Unknown Dynamics in Data-Poor Environments},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2cbd9c540641923027adb8ab89decc05-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2cbd9c540641923027adb8ab89decc05-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2cbd9c540641923027adb8ab89decc05-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2cbd9c540641923027adb8ab89decc05-Reviews.html", "metareview": "", "pdf_size": 2226233, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13217744177618443567&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Texas A&M University; Texas A&M University; Texas A&M University", "aff_domain": "tamu.edu;tamu.edu;ece.tamu.edu", "email": "tamu.edu;tamu.edu;ece.tamu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2cbd9c540641923027adb8ab89decc05-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bayesian Distributed Stochastic Gradient Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11617", "id": "11617", "author_site": "Michael Teng, Frank Wood", "author": "Michael Teng; Frank Wood", "abstract": "We introduce Bayesian distributed stochastic gradient descent (BDSGD), a high-throughput algorithm for training deep neural networks on parallel clusters. This algorithm uses amortized inference in a deep generative model to perform joint posterior predictive inference of mini-batch gradient computation times in a compute cluster specific manner. Specifically, our algorithm mitigates the straggler effect in synchronous, gradient-based optimization by choosing an optimal cutoff beyond which mini-batch gradient messages from slow workers are ignored. In our experiments, we show that eagerly discarding the mini-batch gradient computations of stragglers not only increases throughput but actually increases the overall rate of convergence as a function of wall-clock time by virtue of eliminating idleness. The principal novel contribution and finding of this work goes beyond this by demonstrating that using the predicted run-times from a generative model of cluster worker performance improves substantially over the static-cutoff prior art, leading to reduced deep neural net training times on large computer clusters.", "bibtex": "@inproceedings{NEURIPS2018_86b20716,\n author = {Teng, Michael and Wood, Frank},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Distributed Stochastic Gradient Descent},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/86b20716fbd5b253d27cec43127089bc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/86b20716fbd5b253d27cec43127089bc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/86b20716fbd5b253d27cec43127089bc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/86b20716fbd5b253d27cec43127089bc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/86b20716fbd5b253d27cec43127089bc-Reviews.html", "metareview": "", "pdf_size": 3109658, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9009424179763257771&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff": "Department of Engineering Sciences, University of Oxford; Department of Computer Science, University of British Columbia", "aff_domain": "robots.ox.ac.uk;cs.ubc.ca", "email": "robots.ox.ac.uk;cs.ubc.ca", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/86b20716fbd5b253d27cec43127089bc-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University of Oxford;University of British Columbia", "aff_unique_dep": "Department of Engineering Sciences;Department of Computer Science", "aff_unique_url": "https://www.ox.ac.uk;https://www.ubc.ca", "aff_unique_abbr": "Oxford;UBC", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Oxford;Vancouver", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Canada" }, { "title": "Bayesian Inference of Temporal Task Specifications from Demonstrations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11379", "id": "11379", "author_site": "Ankit Shah, Pritish Kamath, Julie A Shah, Shen Li", "author": "Ankit Shah; Pritish Kamath; Julie A Shah; Shen Li", "abstract": "When observing task demonstrations, human apprentices are able to identify whether a given task is executed correctly long before they gain expertise in actually performing that task. Prior research into learning from demonstrations (LfD) has failed to capture this notion of the acceptability of an execution; meanwhile, temporal logics provide a flexible language for expressing task specifications. Inspired by this, we present Bayesian specification inference, a probabilistic model for inferring task specification as a temporal logic formula. We incorporate methods from probabilistic programming to define our priors, along with a domain-independent likelihood function to enable sampling-based inference. We demonstrate the efficacy of our model for inferring true specifications with over 90% similarity between the inferred specification and the ground truth, both within a synthetic domain and a real-world table setting task.", "bibtex": "@inproceedings{NEURIPS2018_13168e6a,\n author = {Shah, Ankit and Kamath, Pritish and Shah, Julie A and Li, Shen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Inference of Temporal Task Specifications from Demonstrations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/13168e6a2e6c84b4b7de9390c0ef5ec5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/13168e6a2e6c84b4b7de9390c0ef5ec5-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/13168e6a2e6c84b4b7de9390c0ef5ec5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/13168e6a2e6c84b4b7de9390c0ef5ec5-Reviews.html", "metareview": "", "pdf_size": 6581209, "gs_citation": 106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9222228125713861399&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "CSAIL, MIT; CSAIL, MIT; CSAIL, MIT; CSAIL, MIT", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "email": "mit.edu;mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/13168e6a2e6c84b4b7de9390c0ef5ec5-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.csail.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bayesian Model Selection Approach to Boundary Detection with Non-Local Priors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11210", "id": "11210", "author_site": "Fei Jiang, Guosheng Yin, Francesca Dominici", "author": "Fei Jiang; Guosheng Yin; Francesca Dominici", "abstract": "Based on non-local prior distributions, we propose a Bayesian model selection (BMS) procedure for boundary detection in a sequence of data with multiple systematic mean changes. The BMS method can effectively suppress the non-boundary spike points with large instantaneous changes. We speed up the algorithm by reducing the multiple change points to a series of single change point detection problems. We establish the consistency of the estimated number and locations of the change points under various prior distributions. Extensive simulation studies are conducted to compare the BMS with existing methods, and our approach is illustrated with application to the magnetic resonance imaging guided radiation therapy data.", "bibtex": "@inproceedings{NEURIPS2018_7b13b220,\n author = {Jiang, Fei and Yin, Guosheng and Dominici, Francesca},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Model Selection Approach to Boundary Detection with Non-Local Priors},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7b13b2203029ed80337f27127a9f1d28-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7b13b2203029ed80337f27127a9f1d28-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7b13b2203029ed80337f27127a9f1d28-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7b13b2203029ed80337f27127a9f1d28-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7b13b2203029ed80337f27127a9f1d28-Reviews.html", "metareview": "", "pdf_size": 2194782, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10938144870394267579&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Department of Statistics and Actuarial Science, The University of Hong Kong; Department of Statistics and Actuarial Science, The University of Hong Kong; Harvard T.H. Chan School of Public Health, Harvard University", "aff_domain": "hku.hk;hku.hk;hsph.harvard.edu", "email": "hku.hk;hku.hk;hsph.harvard.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7b13b2203029ed80337f27127a9f1d28-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Hong Kong;Harvard University", "aff_unique_dep": "Department of Statistics and Actuarial Science;T.H. Chan School of Public Health", "aff_unique_url": "https://www.hku.hk;https://www.harvard.edu", "aff_unique_abbr": "HKU;Harvard", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Hong Kong SAR;Cambridge", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "Bayesian Model-Agnostic Meta-Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11706", "id": "11706", "author_site": "Jaesik Yoon, Taesup Kim, Ousmane Dia, Sungwoong Kim, Yoshua Bengio, Sungjin Ahn", "author": "Jaesik Yoon; Taesup Kim; Ousmane Dia; Sungwoong Kim; Yoshua Bengio; Sungjin Ahn", "abstract": "Due to the inherent model uncertainty, learning to infer Bayesian posterior from a few-shot dataset is an important step towards robust meta-learning. In this paper, we propose a novel Bayesian model-agnostic meta-learning method. The proposed method combines efficient gradient-based meta-learning with nonparametric variational inference in a principled probabilistic framework. Unlike previous methods, during fast adaptation, the method is capable of learning complex uncertainty structure beyond a simple Gaussian approximation, and during meta-update, a novel Bayesian mechanism prevents meta-level overfitting. Remaining a gradient-based method, it is also the first Bayesian model-agnostic meta-learning method applicable to various tasks including reinforcement learning. Experiment results show the accuracy and robustness of the proposed method in sinusoidal regression, image classification, active learning, and reinforcement learning.", "bibtex": "@inproceedings{NEURIPS2018_e1021d43,\n author = {Yoon, Jaesik and Kim, Taesup and Dia, Ousmane and Kim, Sungwoong and Bengio, Yoshua and Ahn, Sungjin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Model-Agnostic Meta-Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e1021d43911ca2c1845910d84f40aeae-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e1021d43911ca2c1845910d84f40aeae-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e1021d43911ca2c1845910d84f40aeae-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e1021d43911ca2c1845910d84f40aeae-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e1021d43911ca2c1845910d84f40aeae-Reviews.html", "metareview": "", "pdf_size": 3023547, "gs_citation": 536, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7370333111335795917&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "SAP; MILA Universit\u00e9 de Montr\u00e9al + Element AI; Element AI; Kakao Brain; MILA Universit\u00e9 de Montr\u00e9al + CIFAR Senior Fellow; Rutgers University + Element AI", "aff_domain": "sap.com;mila.quebec;elementai.com;kakaobrain.com;mila.quebec;rutgers.edu", "email": "sap.com;mila.quebec;elementai.com;kakaobrain.com;mila.quebec;rutgers.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e1021d43911ca2c1845910d84f40aeae-Abstract.html", "aff_unique_index": "0;1+2;2;3;1+4;5+2", "aff_unique_norm": "SAP SE;Universit\u00e9 de Montr\u00e9al;Element AI;Kakao Brain;CIFAR;Rutgers University", "aff_unique_dep": ";MILA;;;Senior Fellow;", "aff_unique_url": "https://www.sap.com;https://www.umontreal.ca;https://www.elementai.com;https://brain.kakao.com;https://www.cifar.ca;https://www.rutgers.edu", "aff_unique_abbr": "SAP;UdeM;Element AI;Kakao Brain;CIFAR;Rutgers", "aff_campus_unique_index": "1;1;", "aff_campus_unique": ";Montr\u00e9al", "aff_country_unique_index": "0;1+1;1;2;1+1;3+1", "aff_country_unique": "Germany;Canada;South Korea;United States" }, { "title": "Bayesian Nonparametric Spectral Estimation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11960", "id": "11960", "author": "Felipe Tobar", "abstract": "Spectral estimation (SE) aims to identify how the energy of a signal (e.g., a time series) is distributed across different frequencies. This can become particularly challenging when only partial and noisy observations of the signal are available, where current methods fail to handle uncertainty appropriately. In this context, we propose a joint probabilistic model for signals, observations and spectra, where SE is addressed as an inference problem. Assuming a Gaussian process prior over the signal, we apply Bayes' rule to find the analytic posterior distribution of the spectrum given a set of observations. Besides its expressiveness and natural account of spectral uncertainty, the proposed model also provides a functional-form representation of the power spectral density, which can be optimised efficiently. Comparison with previous approaches is addressed theoretically, showing that the proposed method is an infinite-dimensional variant of the Lomb-Scargle approach, and also empirically through three experiments.", "bibtex": "@inproceedings{NEURIPS2018_abd1c782,\n author = {Tobar, Felipe},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Nonparametric Spectral Estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/abd1c782880cc59759f4112fda0b8f98-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/abd1c782880cc59759f4112fda0b8f98-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/abd1c782880cc59759f4112fda0b8f98-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/abd1c782880cc59759f4112fda0b8f98-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/abd1c782880cc59759f4112fda0b8f98-Reviews.html", "metareview": "", "pdf_size": 1286559, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17785517224633397163&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Universidad de Chile", "aff_domain": "dim.uchile.cl", "email": "dim.uchile.cl", "github": "github.com/GAMES-UChile", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/abd1c782880cc59759f4112fda0b8f98-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Universidad de Chile", "aff_unique_dep": "", "aff_unique_url": "https://www.uchile.cl", "aff_unique_abbr": "UCH", "aff_country_unique_index": "0", "aff_country_unique": "Chile" }, { "title": "Bayesian Pose Graph Optimization via Bingham Distributions and Tempered Geodesic MCMC", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11056", "id": "11056", "author_site": "Tolga Birdal, Umut Simsekli, Mustafa Onur Eken, Slobodan Ilic", "author": "Tolga Birdal; Umut Simsekli; Mustafa Onur Eken; Slobodan Ilic", "abstract": "We introduce Tempered Geodesic Markov Chain Monte Carlo (TG-MCMC) algorithm for initializing pose graph optimization problems, arising in various scenarios such as SFM (structure from motion) or SLAM (simultaneous localization and mapping). TG-MCMC is first of its kind as it unites global non-convex optimization on the spherical manifold of quaternions with posterior sampling, in order to provide both reliable initial poses and uncertainty estimates that are informative about the quality of solutions. We devise theoretical convergence guarantees and extensively evaluate our method on synthetic and real benchmarks. Besides its elegance in formulation and theory, we show that our method is robust to missing data, noise and the estimated uncertainties capture intuitive properties of the data.", "bibtex": "@inproceedings{NEURIPS2018_58a2fc6e,\n author = {Birdal, Tolga and Simsekli, Umut and Eken, Mustafa Onur and Ilic, Slobodan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Pose Graph Optimization via Bingham Distributions and Tempered Geodesic MCMC},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/58a2fc6ed39fd083f55d4182bf88826d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/58a2fc6ed39fd083f55d4182bf88826d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/58a2fc6ed39fd083f55d4182bf88826d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/58a2fc6ed39fd083f55d4182bf88826d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/58a2fc6ed39fd083f55d4182bf88826d-Reviews.html", "metareview": "", "pdf_size": 2375415, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14752823099526322185&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/58a2fc6ed39fd083f55d4182bf88826d-Abstract.html" }, { "title": "Bayesian Semi-supervised Learning with Graph Gaussian Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11182", "id": "11182", "author_site": "Yin Cheng Ng, Nicol\u00f2 Colombo, Ricardo Silva", "author": "Yin Cheng Ng; Nicol\u00f2 Colombo; Ricardo Silva", "abstract": "We propose a data-efficient Gaussian process-based Bayesian approach to the semi-supervised learning problem on graphs. The proposed model shows extremely competitive performance when compared to the state-of-the-art graph neural networks on semi-supervised learning benchmark experiments, and outperforms the neural networks in active learning experiments where labels are scarce. Furthermore, the model does not require a validation data set for early stopping to control over-fitting. Our model can be viewed as an instance of empirical distribution regression weighted locally by network connectivity. We further motivate the intuitive construction of the model with a Bayesian linear model interpretation where the node features are filtered by an operator related to the graph Laplacian. The method can be easily implemented by adapting off-the-shelf scalable variational inference algorithms for Gaussian processes.", "bibtex": "@inproceedings{NEURIPS2018_1fc21400,\n author = {Ng, Yin Cheng and Colombo, Nicol\\`{o} and Silva, Ricardo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Semi-supervised Learning with Graph Gaussian Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1fc214004c9481e4c8073e85323bfd4b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1fc214004c9481e4c8073e85323bfd4b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1fc214004c9481e4c8073e85323bfd4b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1fc214004c9481e4c8073e85323bfd4b-Reviews.html", "metareview": "", "pdf_size": 675088, "gs_citation": 117, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10102495572315452574&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Statistical Science, University College London; Statistical Science, University College London; Statistical Science, University College London + The Alan Turing Institute", "aff_domain": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "email": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1fc214004c9481e4c8073e85323bfd4b-Abstract.html", "aff_unique_index": "0;0;0+1", "aff_unique_norm": "University College London;Alan Turing Institute", "aff_unique_dep": "Statistical Science;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.turing.ac.uk", "aff_unique_abbr": "UCL;ATI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "London;", "aff_country_unique_index": "0;0;0+0", "aff_country_unique": "United Kingdom" }, { "title": "Bayesian Structure Learning by Recursive Bootstrap", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11995", "id": "11995", "author_site": "Raanan Y. Rohekar, Yaniv Gurwicz, Shami Nisimov, Guy Koren, Gal Novik", "author": "Raanan Y. Rohekar; Yaniv Gurwicz; Shami Nisimov; Guy Koren; Gal Novik", "abstract": "We address the problem of Bayesian structure learning for domains with hundreds of variables by employing non-parametric bootstrap, recursively. We propose a method that covers both model averaging and model selection in the same framework. The proposed method deals with the main weakness of constraint-based learning---sensitivity to errors in the independence tests---by a novel way of combining bootstrap with constraint-based learning. Essentially, we provide an algorithm for learning a tree, in which each node represents a scored CPDAG for a subset of variables and the level of the node corresponds to the maximal order of conditional independencies that are encoded in the graph. As higher order independencies are tested in deeper recursive calls, they benefit from more bootstrap samples, and therefore are more resistant to the curse-of-dimensionality. Moreover, the re-use of stable low order independencies allows greater computational efficiency. We also provide an algorithm for sampling CPDAGs efficiently from their posterior given the learned tree. That is, not from the full posterior, but from a reduced space of CPDAGs encoded in the learned tree. We empirically demonstrate that the proposed algorithm scales well to hundreds of variables, and learns better MAP models and more reliable causal relationships between variables, than other state-of-the-art-methods.", "bibtex": "@inproceedings{NEURIPS2018_11e2ad6b,\n author = {Rohekar, Raanan Y. and Gurwicz, Yaniv and Nisimov, Shami and Koren, Guy and Novik, Gal},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Structure Learning by Recursive Bootstrap},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/11e2ad6bf99300cd3808bb105b55d4b8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/11e2ad6bf99300cd3808bb105b55d4b8-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/11e2ad6bf99300cd3808bb105b55d4b8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/11e2ad6bf99300cd3808bb105b55d4b8-Reviews.html", "metareview": "", "pdf_size": 629016, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8741496663210631585&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Intel AI Lab; Intel AI Lab; Intel AI Lab; Intel AI Lab; Intel AI Lab", "aff_domain": "intel.com;intel.com;intel.com;intel.com;intel.com", "email": "intel.com;intel.com;intel.com;intel.com;intel.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/11e2ad6bf99300cd3808bb105b55d4b8-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Intel", "aff_unique_dep": "Intel AI Lab", "aff_unique_url": "https://www.intel.com", "aff_unique_abbr": "Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bayesian multi-domain learning for cancer subtype discovery from next-generation sequencing count data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11869", "id": "11869", "author_site": "Ehsan Hajiramezanali, Siamak Zamani Dadaneh, Alireza Karbalayghareh, Mingyuan Zhou, Xiaoning Qian", "author": "Ehsan Hajiramezanali; Siamak Zamani Dadaneh; Alireza Karbalayghareh; Mingyuan Zhou; Xiaoning Qian", "abstract": "Precision medicine aims for personalized prognosis and therapeutics by utilizing recent genome-scale high-throughput profiling techniques, including next-generation sequencing (NGS). However, translating NGS data faces several challenges. First, NGS count data are often overdispersed, requiring appropriate modeling. Second, compared to the number of involved molecules and system complexity, the number of available samples for studying complex disease, such as cancer, is often limited, especially considering disease heterogeneity. The key question is whether we may integrate available data from all different sources or domains to achieve reproducible disease prognosis based on NGS count data. In this paper, we develop a Bayesian Multi-Domain Learning (BMDL) model that derives domain-dependent latent representations of overdispersed count data based on hierarchical negative binomial factorization for accurate cancer subtyping even if the number of samples for a specific cancer type is small. Experimental results from both our simulated and NGS datasets from The Cancer Genome Atlas (TCGA) demonstrate the promising potential of BMDL for effective multi-domain learning without ``negative transfer'' effects often seen in existing multi-task learning and transfer learning methods.", "bibtex": "@inproceedings{NEURIPS2018_28a543c2,\n author = {Hajiramezanali, Ehsan and Zamani Dadaneh, Siamak and Karbalayghareh, Alireza and Zhou, Mingyuan and Qian, Xiaoning},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian multi-domain learning for cancer subtype discovery from next-generation sequencing count data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/28a543c2a9eee8c0d6fbfaff7ca7e224-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/28a543c2a9eee8c0d6fbfaff7ca7e224-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/28a543c2a9eee8c0d6fbfaff7ca7e224-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/28a543c2a9eee8c0d6fbfaff7ca7e224-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/28a543c2a9eee8c0d6fbfaff7ca7e224-Reviews.html", "metareview": "", "pdf_size": 294886, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18365027097498502759&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Texas A&M University; Texas A&M University; Texas A&M University; University of Texas at Austin; Texas A&M University", "aff_domain": "tamu.edu;tamu.edu;tamu.edu;mccombs.utexas.edu;ece.tamu.edu", "email": "tamu.edu;tamu.edu;tamu.edu;mccombs.utexas.edu;ece.tamu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/28a543c2a9eee8c0d6fbfaff7ca7e224-Abstract.html", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Texas A&M University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.tamu.edu;https://www.utexas.edu", "aff_unique_abbr": "TAMU;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Beauty-in-averageness and its contextual modulations: A Bayesian statistical account", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11405", "id": "11405", "author_site": "Chaitanya Ryali, Angela Yu", "author": "Chaitanya Ryali; Angela J. Yu", "abstract": "Understanding how humans perceive the likability of high-dimensional", "bibtex": "@inproceedings{NEURIPS2018_84ddfb34,\n author = {Ryali, Chaitanya and Yu, Angela J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Beauty-in-averageness and its contextual modulations: A Bayesian statistical account},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/84ddfb34126fc3a48ee38d7044e87276-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/84ddfb34126fc3a48ee38d7044e87276-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/84ddfb34126fc3a48ee38d7044e87276-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/84ddfb34126fc3a48ee38d7044e87276-Reviews.html", "metareview": "", "pdf_size": 1549986, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4036768684830535501&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science and Engineering, University of California San Diego; Department of Cognitive Science, University of California San Diego", "aff_domain": "eng.ucsd.edu;ucsd.edu", "email": "eng.ucsd.edu;ucsd.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/84ddfb34126fc3a48ee38d7044e87276-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Benefits of over-parameterization with EM", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12008", "id": "12008", "author_site": "Ji Xu, Daniel Hsu, Arian Maleki", "author": "Ji Xu; Daniel J. Hsu; Arian Maleki", "abstract": "Expectation Maximization (EM) is among the most popular algorithms for maximum likelihood estimation, but it is generally only guaranteed to find its stationary points of the log-likelihood objective. The goal of this article is to present theoretical and empirical evidence that over-parameterization can help EM avoid spurious local optima in the log-likelihood. We consider the problem of estimating the mean vectors of a Gaussian mixture model in a scenario where the mixing weights are known. Our study shows that the global behavior of EM, when one uses an over-parameterized model in which the mixing weights are treated as unknown, is better than that when one uses the (correct) model with the mixing weights fixed to the known values. For symmetric Gaussians mixtures with two components, we prove that introducing the (statistically redundant) weight parameters enables EM to find the global maximizer of the log-likelihood starting from almost any initial mean parameters, whereas EM without this over-parameterization may very often fail. For other Gaussian mixtures, we provide empirical evidence that shows similar behavior. Our results corroborate the value of over-parameterization in solving non-convex optimization problems, previously observed in other domains.", "bibtex": "@inproceedings{NEURIPS2018_86ba98bc,\n author = {Xu, Ji and Hsu, Daniel J and Maleki, Arian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Benefits of over-parameterization with EM},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/86ba98bcbd3466d253841907ba1fc725-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/86ba98bcbd3466d253841907ba1fc725-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/86ba98bcbd3466d253841907ba1fc725-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/86ba98bcbd3466d253841907ba1fc725-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/86ba98bcbd3466d253841907ba1fc725-Reviews.html", "metareview": "", "pdf_size": 1279384, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17082846635525763886&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Columbia University; Columbia University; Columbia University", "aff_domain": "cs.columbia.edu;cs.columbia.edu;stat.columbia.edu", "email": "cs.columbia.edu;cs.columbia.edu;stat.columbia.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/86ba98bcbd3466d253841907ba1fc725-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Beyond Grids: Learning Graph Representations for Visual Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11879", "id": "11879", "author_site": "Yin Li, Abhinav Gupta", "author": "Yin Li; Abhinav Gupta", "abstract": "We propose learning graph representations from 2D feature maps for visual recognition. Our method draws inspiration from region based recognition, and learns to transform a 2D image into a graph structure. The vertices of the graph define clusters of pixels (\"regions\"), and the edges measure the similarity between these clusters in a feature space. Our method further learns to propagate information across all vertices on the graph, and is able to project the learned graph representation back into 2D grids. Our graph representation facilitates reasoning beyond regular grids and can capture long range dependencies among regions. We demonstrate that our model can be trained from end-to-end, and is easily integrated into existing networks. Finally, we evaluate our method on three challenging recognition tasks: semantic segmentation, object detection and object instance segmentation. For all tasks, our method outperforms state-of-the-art methods.", "bibtex": "@inproceedings{NEURIPS2018_4efb80f6,\n author = {Li, Yin and Gupta, Abhinav},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Beyond Grids: Learning Graph Representations for Visual Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4efb80f630ccecb2d3b9b2087b0f9c89-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4efb80f630ccecb2d3b9b2087b0f9c89-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4efb80f630ccecb2d3b9b2087b0f9c89-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4efb80f630ccecb2d3b9b2087b0f9c89-Reviews.html", "metareview": "", "pdf_size": 24960048, "gs_citation": 246, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8327623024880108029&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff": "Department of Biostatistics & Medical Informatics+Department of Computer Sciences, University of Wisconsin\u2013Madison; The Robotics Institute, School of Computer Science, Carnegie Mellon University", "aff_domain": "wisc.edu;cs.cmu.edu", "email": "wisc.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4efb80f630ccecb2d3b9b2087b0f9c89-Abstract.html", "aff_unique_index": "0+1;2", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin\u2013Madison;Carnegie Mellon University", "aff_unique_dep": "Department of Biostatistics & Medical Informatics;Department of Computer Sciences;School of Computer Science", "aff_unique_url": "https://biostat.wisc.edu;https://www.wisc.edu;https://www.cmu.edu", "aff_unique_abbr": ";UW\u2013Madison;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0+0;0", "aff_country_unique": "United States" }, { "title": "Beyond Log-concavity: Provable Guarantees for Sampling Multi-modal Distributions using Simulated Tempering Langevin Monte Carlo", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11753", "id": "11753", "author_site": "Holden Lee, Andrej Risteski, Rong Ge", "author": "Holden Lee; Andrej Risteski; Rong Ge", "abstract": "A key task in Bayesian machine learning is sampling from distributions that are only specified up to a partition function (i.e., constant of proportionality). One prevalent example of this is sampling posteriors in parametric \ndistributions, such as latent-variable generative models. However sampling (even very approximately) can be #P-hard.", "bibtex": "@inproceedings{NEURIPS2018_c6ede20e,\n author = {Lee, Holden and Risteski, Andrej and Ge, Rong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Beyond Log-concavity: Provable Guarantees for Sampling Multi-modal Distributions using Simulated Tempering Langevin Monte Carlo},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c6ede20e6f597abf4b3f6bb30cee16c7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c6ede20e6f597abf4b3f6bb30cee16c7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c6ede20e6f597abf4b3f6bb30cee16c7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c6ede20e6f597abf4b3f6bb30cee16c7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c6ede20e6f597abf4b3f6bb30cee16c7-Reviews.html", "metareview": "", "pdf_size": 322661, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16707744327040034989&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Duke University, Computer Science Department; Princeton University, Mathematics Department; Massachusetts Institute of Technology, Applied Mathematics and IDSS", "aff_domain": "cs.duke.edu;princeton.edu;mit.edu", "email": "cs.duke.edu;princeton.edu;mit.edu", "github": "", "project": "http://tiny.cc/glr17", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c6ede20e6f597abf4b3f6bb30cee16c7-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Duke University;Princeton University;Massachusetts Institute of Technology", "aff_unique_dep": "Computer Science Department;Mathematics Department;Applied Mathematics and IDSS", "aff_unique_url": "https://www.duke.edu;https://www.princeton.edu;https://www.mit.edu", "aff_unique_abbr": "Duke;Princeton;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bias and Generalization in Deep Generative Models: An Empirical Study", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12020", "id": "12020", "author_site": "Shengjia Zhao, Hongyu Ren, Arianna Yuan, Jiaming Song, Noah Goodman, Stefano Ermon", "author": "Shengjia Zhao; Hongyu Ren; Arianna Yuan; Jiaming Song; Noah Goodman; Stefano Ermon", "abstract": "In high dimensional settings, density estimation algorithms rely crucially on their inductive bias. Despite recent empirical success, the inductive bias of deep generative models is not well understood. In this paper we propose a framework to systematically investigate bias and generalization in deep generative models of images by probing the learning algorithm with carefully designed training datasets. By measuring properties of the learned distribution, we are able to find interesting patterns of generalization. We verify that these patterns are consistent across datasets, common models and architectures.", "bibtex": "@inproceedings{NEURIPS2018_5317b679,\n author = {Zhao, Shengjia and Ren, Hongyu and Yuan, Arianna and Song, Jiaming and Goodman, Noah and Ermon, Stefano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bias and Generalization in Deep Generative Models: An Empirical Study},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5317b6799188715d5e00a638a4278901-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5317b6799188715d5e00a638a4278901-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5317b6799188715d5e00a638a4278901-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5317b6799188715d5e00a638a4278901-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5317b6799188715d5e00a638a4278901-Reviews.html", "metareview": "", "pdf_size": 1033599, "gs_citation": 166, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17301681294706446940&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Stanford University; Stanford University; Stanford University; Stanford University; Stanford University; Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5317b6799188715d5e00a638a4278901-Abstract.html", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Bilevel Distance Metric Learning for Robust Image Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11416", "id": "11416", "author_site": "Jie Xu, Lei Luo, Cheng Deng, Heng Huang", "author": "Jie Xu; Lei Luo; Cheng Deng; Heng Huang", "abstract": "Metric learning, aiming to learn a discriminative Mahalanobis distance matrix M that can effectively reflect the similarity between data samples, has been widely studied in various image recognition problems. Most of the existing metric learning methods input the features extracted directly from the original data in the preprocess phase. What's worse, these features usually take no consideration of the local geometrical structure of the data and the noise existed in the data, thus they may not be optimal for the subsequent metric learning task. In this paper, we integrate both feature extraction and metric learning into one joint optimization framework and propose a new bilevel distance metric learning model. Specifically, the lower level characterizes the intrinsic data structure using graph regularized sparse coefficients, while the upper level forces the data samples from the same class to be close to each other and pushes those from different classes far away. \n In addition, leveraging the KKT conditions and the alternating direction method (ADM), we derive an efficient algorithm to solve the proposed new model. Extensive experiments on various occluded datasets demonstrate the effectiveness and robustness of our method.", "bibtex": "@inproceedings{NEURIPS2018_814a9c18,\n author = {Xu, Jie and Luo, Lei and Deng, Cheng and Huang, Heng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bilevel Distance Metric Learning for Robust Image Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/814a9c18f5abff398787c9cfcbf3d80c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/814a9c18f5abff398787c9cfcbf3d80c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/814a9c18f5abff398787c9cfcbf3d80c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/814a9c18f5abff398787c9cfcbf3d80c-Reviews.html", "metareview": "", "pdf_size": 684930, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6276842131147685661&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "School of Electronic Engineering, Xidian University; Electrical and Computer Engineering, University of Pittsburgh; School of Electronic Engineering, Xidian University + Electrical and Computer Engineering, University of Pittsburgh; Electrical and Computer Engineering, University of Pittsburgh + JDDGlobal.com", "aff_domain": "pitt.edu;pitt.edu;gmail.com;pitt.edu", "email": "pitt.edu;pitt.edu;gmail.com;pitt.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/814a9c18f5abff398787c9cfcbf3d80c-Abstract.html", "aff_unique_index": "0;1;0+1;1+2", "aff_unique_norm": "Xidian University;University of Pittsburgh;JDD Global", "aff_unique_dep": "School of Electronic Engineering;Electrical and Computer Engineering;", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.pitt.edu;", "aff_unique_abbr": "Xidian;Pitt;", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0+1;1", "aff_country_unique": "China;United States;" }, { "title": "Bilevel learning of the Group Lasso structure", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11794", "id": "11794", "author_site": "Jordan Frecon, Saverio Salzo, Massimiliano Pontil", "author": "Jordan Frecon; Saverio Salzo; Massimiliano Pontil", "abstract": "Regression with group-sparsity penalty plays a central role in high-dimensional prediction problems. Most of existing methods require the group structure to be known a priori. In practice, this may be a too strong assumption, potentially hampering the effectiveness of the regularization method. To circumvent this issue, we present a method to estimate the group structure by means of a continuous bilevel optimization problem where the data is split into training and validation sets. Our approach relies on an approximation scheme where the lower level problem is replaced by a smooth dual forward-backward algorithm with Bregman distances. We provide guarantees regarding the convergence of the approximate procedure to the exact problem and demonstrate the well behaviour of the proposed method on synthetic experiments. Finally, a preliminary application to genes expression data is tackled with the purpose of unveiling functional groups.", "bibtex": "@inproceedings{NEURIPS2018_56bd37d3,\n author = {Frecon, Jordan and Salzo, Saverio and Pontil, Massimiliano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bilevel learning of the Group Lasso structure},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/56bd37d3a2fda0f2f41925019c81011d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/56bd37d3a2fda0f2f41925019c81011d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/56bd37d3a2fda0f2f41925019c81011d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/56bd37d3a2fda0f2f41925019c81011d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/56bd37d3a2fda0f2f41925019c81011d-Reviews.html", "metareview": "", "pdf_size": 801670, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14737374135680560733&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Computational Statistics and Machine Learning, Istituto Italiano di Tecnologia (Italy)+Department of Computer Science, University College London (UK); Computational Statistics and Machine Learning, Istituto Italiano di Tecnologia (Italy)+Department of Computer Science, University College London (UK); Computational Statistics and Machine Learning, Istituto Italiano di Tecnologia (Italy)+Department of Computer Science, University College London (UK)", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/56bd37d3a2fda0f2f41925019c81011d-Abstract.html", "aff_unique_index": "0+1;0+1;0+1", "aff_unique_norm": "Istituto Italiano di Tecnologia;University College London", "aff_unique_dep": "Computational Statistics and Machine Learning;Department of Computer Science", "aff_unique_url": "https://www.iit.it;https://www.ucl.ac.uk", "aff_unique_abbr": "IIT;UCL", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0+1;0+1;0+1", "aff_country_unique": "Italy;United Kingdom" }, { "title": "Bilinear Attention Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11171", "id": "11171", "author_site": "Jin-Hwa Kim, Jaehyun Jun, Byoung-Tak Zhang", "author": "Jin-Hwa Kim; Jaehyun Jun; Byoung-Tak Zhang", "abstract": "Attention networks in multimodal learning provide an efficient way to utilize given visual information selectively. However, the computational cost to learn attention distributions for every pair of multimodal input channels is prohibitively expensive. To solve this problem, co-attention builds two separate attention distributions for each modality neglecting the interaction between multimodal inputs. In this paper, we propose bilinear attention networks (BAN) that find bilinear attention distributions to utilize given vision-language information seamlessly. BAN considers bilinear interactions among two groups of input channels, while low-rank bilinear pooling extracts the joint representations for each pair of channels. Furthermore, we propose a variant of multimodal residual networks to exploit eight-attention maps of the BAN efficiently. We quantitatively and qualitatively evaluate our model on visual question answering (VQA 2.0) and Flickr30k Entities datasets, showing that BAN significantly outperforms previous methods and achieves new state-of-the-arts on both datasets.", "bibtex": "@inproceedings{NEURIPS2018_96ea64f3,\n author = {Kim, Jin-Hwa and Jun, Jaehyun and Zhang, Byoung-Tak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bilinear Attention Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/96ea64f3a1aa2fd00c72faacf0cb8ac9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/96ea64f3a1aa2fd00c72faacf0cb8ac9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/96ea64f3a1aa2fd00c72faacf0cb8ac9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/96ea64f3a1aa2fd00c72faacf0cb8ac9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/96ea64f3a1aa2fd00c72faacf0cb8ac9-Reviews.html", "metareview": "", "pdf_size": 1706445, "gs_citation": 1153, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10383181412923835294&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "SK T-Brain; Seoul National University+Surromind Robotics; Seoul National University+Surromind Robotics", "aff_domain": "sktbrain.com;bi.snu.ac.kr;bi.snu.ac.kr", "email": "sktbrain.com;bi.snu.ac.kr;bi.snu.ac.kr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/96ea64f3a1aa2fd00c72faacf0cb8ac9-Abstract.html", "aff_unique_index": "0;1+2;1+2", "aff_unique_norm": "SK T-Brain;Seoul National University;Surromind Robotics", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sktelecom.com/en/tbrain;https://www.snu.ac.kr;", "aff_unique_abbr": "SK T-Brain;SNU;", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea;" }, { "title": "BinGAN: Learning Compact Binary Descriptors with a Regularized GAN", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11361", "id": "11361", "author_site": "Maciej Zieba, Piotr Semberecki, Tarek El-Gaaly, Tomasz Trzcinski", "author": "Maciej Zieba; Piotr Semberecki; Tarek El-Gaaly; Tomasz Trzcinski", "abstract": "In this paper, we propose a novel regularization method for Generative Adversarial Networks that allows the model to learn discriminative yet compact binary representations of image patches (image descriptors). We exploit the dimensionality reduction that takes place in the intermediate layers of the discriminator network and train the binarized penultimate layer's low-dimensional representation to mimic the distribution of the higher-dimensional preceding layers. To achieve this, we introduce two loss terms that aim at: (i) reducing the correlation between the dimensions of the binarized penultimate layer's low-dimensional representation (i.e. maximizing joint entropy) and (ii) propagating the relations between the dimensions in the high-dimensional space to the low-dimensional space. We evaluate the resulting binary image descriptors on two challenging applications, image matching and retrieval, where they achieve state-of-the-art results.", "bibtex": "@inproceedings{NEURIPS2018_f442d33f,\n author = {Zieba, Maciej and Semberecki, Piotr and El-Gaaly, Tarek and Trzcinski, Tomasz},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {BinGAN: Learning Compact Binary Descriptors with a Regularized GAN},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f442d33fa06832082290ad8544a8da27-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f442d33fa06832082290ad8544a8da27-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f442d33fa06832082290ad8544a8da27-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f442d33fa06832082290ad8544a8da27-Reviews.html", "metareview": "", "pdf_size": 1612353, "gs_citation": 90, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7540991992898429437&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Wroclaw University of Science and Technology + Tooploox; Wroclaw University of Science and Technology + Tooploox; Voyage; Warsaw University of Technology + Tooploox", "aff_domain": "pwr.edu.pl;pwr.edu.pl;voyage.auto;ii.pw.edu.pl", "email": "pwr.edu.pl;pwr.edu.pl;voyage.auto;ii.pw.edu.pl", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f442d33fa06832082290ad8544a8da27-Abstract.html", "aff_unique_index": "0+1;0+1;2;3+1", "aff_unique_norm": "Wroclaw University of Science and Technology;Tooploox;Voyage;Warsaw University of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.pwr.edu.pl;https://www.tooploox.com;;https://www.pw.edu.pl", "aff_unique_abbr": "WUST;;;WUT", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0+0;0+0", "aff_country_unique": "Poland;" }, { "title": "Binary Classification from Positive-Confidence Data", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11575", "id": "11575", "author_site": "Takashi Ishida, Gang Niu, Masashi Sugiyama", "author": "Takashi Ishida; Gang Niu; Masashi Sugiyama", "abstract": "Can we learn a binary classifier from only positive data, without any negative data or unlabeled data? We show that if one can equip positive data with confidence (positive-confidence), one can successfully learn a binary classifier, which we name positive-confidence (Pconf) classification. Our work is related to one-class classification which is aimed at \"describing\" the positive class by clustering-related methods, but one-class classification does not have the ability to tune hyper-parameters and their aim is not on \"discriminating\" positive and negative classes. For the Pconf classification problem, we provide a simple empirical risk minimization framework that is model-independent and optimization-independent. We theoretically establish the consistency and an estimation error bound, and demonstrate the usefulness of the proposed method for training deep neural networks through experiments.", "bibtex": "@inproceedings{NEURIPS2018_bd135462,\n author = {Ishida, Takashi and Niu, Gang and Sugiyama, Masashi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Binary Classification from Positive-Confidence Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/bd1354624fbae3b2149878941c60df99-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/bd1354624fbae3b2149878941c60df99-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/bd1354624fbae3b2149878941c60df99-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/bd1354624fbae3b2149878941c60df99-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/bd1354624fbae3b2149878941c60df99-Reviews.html", "metareview": "", "pdf_size": 1075639, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10725870998628923240&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "The University of Tokyo, Tokyo, Japan + RIKEN, Tokyo, Japan; RIKEN, Tokyo, Japan; The University of Tokyo, Tokyo, Japan + RIKEN, Tokyo, Japan", "aff_domain": "ms.k.u-tokyo.ac.jp;riken.jp;k.u-tokyo.ac.jp", "email": "ms.k.u-tokyo.ac.jp;riken.jp;k.u-tokyo.ac.jp", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/bd1354624fbae3b2149878941c60df99-Abstract.html", "aff_unique_index": "0+1;1;0+1", "aff_unique_norm": "University of Tokyo;RIKEN", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.riken.jp", "aff_unique_abbr": "UTokyo;RIKEN", "aff_campus_unique_index": "0+0;0;0+0", "aff_campus_unique": "Tokyo", "aff_country_unique_index": "0+0;0;0+0", "aff_country_unique": "Japan" }, { "title": "Binary Rating Estimation with Graph Side Information", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11423", "id": "11423", "author_site": "Kwangjun Ahn, Kangwook Lee, Hyunseung Cha, Changho Suh", "author": "Kwangjun Ahn; Kangwook Lee; Hyunseung Cha; Changho Suh", "abstract": "Rich experimental evidences show that one can better estimate users' unknown ratings with the aid of graph side information such as social graphs. However, the gain is not theoretically quantified. In this work, we study the binary rating estimation problem to understand the fundamental value of graph side information. Considering a simple correlation model between a rating matrix and a graph, we characterize the sharp threshold on the number of observed entries required to recover the rating matrix (called the optimal sample complexity) as a function of the quality of graph side information (to be detailed). To the best of our knowledge, we are the first to reveal how much the graph side information reduces sample complexity. Further, we propose a computationally efficient algorithm that achieves the limit. Our experimental results demonstrate that the algorithm performs well even with real-world graphs.", "bibtex": "@inproceedings{NEURIPS2018_0b1ec366,\n author = {Ahn, Kwangjun and Lee, Kangwook and Cha, Hyunseung and Suh, Changho},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Binary Rating Estimation with Graph Side Information},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0b1ec366924b26fc98fa7b71a9c249cf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0b1ec366924b26fc98fa7b71a9c249cf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0b1ec366924b26fc98fa7b71a9c249cf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0b1ec366924b26fc98fa7b71a9c249cf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0b1ec366924b26fc98fa7b71a9c249cf-Reviews.html", "metareview": "", "pdf_size": 492656, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1420831281248167662&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "142nd Military Police Company+Korean Augmentation To the United States Army; School of Electrical Engineering, KAIST; Kakao Brain; School of Electrical Engineering, KAIST", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kakaobrain.com;kaist.ac.kr", "email": "kaist.ac.kr;kaist.ac.kr;kakaobrain.com;kaist.ac.kr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0b1ec366924b26fc98fa7b71a9c249cf-Abstract.html", "aff_unique_index": "0+1;2;3;2", "aff_unique_norm": "142nd Military Police Company;United States Army;KAIST;Kakao Brain", "aff_unique_dep": "Military Police;Korean Augmentation;School of Electrical Engineering;", "aff_unique_url": ";https://www.army.mil;https://www.kaist.ac.kr;https://brain.kakao.com", "aff_unique_abbr": ";US Army;KAIST;Kakao Brain", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;1;1;1", "aff_country_unique": "United States;South Korea" }, { "title": "Bipartite Stochastic Block Models with Tiny Clusters", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11385", "id": "11385", "author": "Stefan Neumann", "abstract": "We study the problem of finding clusters in random bipartite graphs. We present a simple two-step algorithm which provably finds even tiny clusters of size $O(n^\\epsilon)$, where $n$ is the number of vertices in the graph and $\\epsilon > 0$. Previous algorithms were only able to identify clusters of size $\\Omega(\\sqrt{n})$. We evaluate the algorithm on synthetic and on real-world data; the experiments show that the algorithm can find extremely small clusters even in presence of high destructive noise.", "bibtex": "@inproceedings{NEURIPS2018_ab731488,\n author = {Neumann, Stefan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bipartite Stochastic Block Models with Tiny Clusters},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ab7314887865c4265e896c6e209d1cd6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ab7314887865c4265e896c6e209d1cd6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ab7314887865c4265e896c6e209d1cd6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ab7314887865c4265e896c6e209d1cd6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ab7314887865c4265e896c6e209d1cd6-Reviews.html", "metareview": "", "pdf_size": 741935, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17783110520995544555&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ab7314887865c4265e896c6e209d1cd6-Abstract.html" }, { "title": "Blind Deconvolutional Phase Retrieval via Convex Programming", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11951", "id": "11951", "author_site": "Ali Ahmed, Alireza Aghasi, Paul Hand", "author": "Ali Ahmed; Alireza Aghasi; Paul Hand", "abstract": "We consider the task of recovering two real or complex $m$-vectors from phaseless Fourier measurements of their circular convolution. Our method is a novel convex relaxation that is based on a lifted matrix recovery formulation that allows a nontrivial convex relaxation of the bilinear measurements from convolution. We prove that if the two signals belong to known random subspaces of dimensions $k$ and $n$, then they can be recovered up to the inherent scaling ambiguity with $m >> (k+n) \\log^2 m$ phaseless measurements. Our method provides the first theoretical recovery guarantee for this problem by a computationally efficient algorithm and does not require a solution estimate to be computed for initialization. Our proof is based Rademacher complexity estimates. Additionally, we provide an ADMM implementation of the method and provide numerical experiments that verify the theory.", "bibtex": "@inproceedings{NEURIPS2018_d5b3d8da,\n author = {Ahmed, Ali and Aghasi, Alireza and Hand, Paul},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Blind Deconvolutional Phase Retrieval via Convex Programming},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d5b3d8dadd770c460b1cde910a711987-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d5b3d8dadd770c460b1cde910a711987-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d5b3d8dadd770c460b1cde910a711987-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d5b3d8dadd770c460b1cde910a711987-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d5b3d8dadd770c460b1cde910a711987-Reviews.html", "metareview": "", "pdf_size": 1247654, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16166042146163169134&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Electrical Engineering, Information Technology University, Lahore, Pakistan; Department of Business Analytics, Georgia State University, Atlanta, GA; College of Computer and Information Science, Northeastern University, Boston, MA", "aff_domain": "itu.edu.pk;gsu.edu;northeastern.edu", "email": "itu.edu.pk;gsu.edu;northeastern.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d5b3d8dadd770c460b1cde910a711987-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Information Technology University;Georgia State University;Northeastern University", "aff_unique_dep": "Department of Electrical Engineering;Department of Business Analytics;College of Computer and Information Science", "aff_unique_url": "https://www.itu.edu.pk;https://www.gsu.edu;https://www.northeastern.edu", "aff_unique_abbr": ";GSU;NEU", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Lahore;Atlanta;Boston", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Pakistan;United States" }, { "title": "Blockwise Parallel Decoding for Deep Autoregressive Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11956", "id": "11956", "author_site": "Mitchell Stern, Noam Shazeer, Jakob Uszkoreit", "author": "Mitchell Stern; Noam Shazeer; Jakob Uszkoreit", "abstract": "Deep autoregressive sequence-to-sequence models have demonstrated impressive performance across a wide variety of tasks in recent years. While common architecture classes such as recurrent, convolutional, and self-attention networks make different trade-offs between the amount of computation needed per layer and the length of the critical path at training time, generation still remains an inherently sequential process. To overcome this limitation, we propose a novel blockwise parallel decoding scheme in which we make predictions for multiple time steps in parallel then back off to the longest prefix validated by a scoring model. This allows for substantial theoretical improvements in generation speed when applied to architectures that can process output sequences in parallel. We verify our approach empirically through a series of experiments using state-of-the-art self-attention models for machine translation and image super-resolution, achieving iteration reductions of up to 2x over a baseline greedy decoder with no loss in quality, or up to 7x in exchange for a slight decrease in performance. In terms of wall-clock time, our fastest models exhibit real-time speedups of up to 4x over standard greedy decoding.", "bibtex": "@inproceedings{NEURIPS2018_c4127b91,\n author = {Stern, Mitchell and Shazeer, Noam and Uszkoreit, Jakob},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Blockwise Parallel Decoding for Deep Autoregressive Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c4127b9194fe8562c64dc0f5bf2c93bc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c4127b9194fe8562c64dc0f5bf2c93bc-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c4127b9194fe8562c64dc0f5bf2c93bc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c4127b9194fe8562c64dc0f5bf2c93bc-Reviews.html", "metareview": "", "pdf_size": 382092, "gs_citation": 228, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6466621361308032943&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of California, Berkeley; Google Brain; Google Brain", "aff_domain": "berkeley.edu;google.com;google.com", "email": "berkeley.edu;google.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c4127b9194fe8562c64dc0f5bf2c93bc-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google Brain", "aff_unique_url": "https://www.berkeley.edu;https://brain.google.com", "aff_unique_abbr": "UC Berkeley;Google Brain", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Boolean Decision Rules via Column Generation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11458", "id": "11458", "author_site": "Sanjeeb Dash, Oktay Gunluk, Dennis Wei", "author": "Sanjeeb Dash; Oktay Gunluk; Dennis Wei", "abstract": "This paper considers the learning of Boolean rules in either disjunctive normal form (DNF, OR-of-ANDs, equivalent to decision rule sets) or conjunctive normal form (CNF, AND-of-ORs) as an interpretable model for classification. An integer program is formulated to optimally trade classification accuracy for rule simplicity. Column generation (CG) is used to efficiently search over an exponential number of candidate clauses (conjunctions or disjunctions) without the need for heuristic rule mining. This approach also bounds the gap between the selected rule set and the best possible rule set on the training data. To handle large datasets, we propose an approximate CG algorithm using randomization. Compared to three recently proposed alternatives, the CG algorithm dominates the accuracy-simplicity trade-off in 8 out of 16 datasets. When maximized for accuracy, CG is competitive with rule learners designed for this purpose, sometimes finding significantly simpler solutions that are no less accurate.", "bibtex": "@inproceedings{NEURIPS2018_743394be,\n author = {Dash, Sanjeeb and Gunluk, Oktay and Wei, Dennis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Boolean Decision Rules via Column Generation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/743394beff4b1282ba735e5e3723ed74-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/743394beff4b1282ba735e5e3723ed74-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/743394beff4b1282ba735e5e3723ed74-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/743394beff4b1282ba735e5e3723ed74-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/743394beff4b1282ba735e5e3723ed74-Reviews.html", "metareview": "", "pdf_size": 651864, "gs_citation": 241, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=936799637404204681&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/743394beff4b1282ba735e5e3723ed74-Abstract.html" }, { "title": "Boosted Sparse and Low-Rank Tensor Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11121", "id": "11121", "author_site": "Jun Yu, Kun Chen, Wanwan Xu, Jiayu Zhou, Fei Wang", "author": "Lifang He; Kun Chen; Wanwan Xu; Jiayu Zhou; Fei Wang", "abstract": "We propose a sparse and low-rank tensor regression model to relate a univariate outcome to a feature tensor, in which each unit-rank tensor from the CP decomposition of the coefficient tensor is assumed to be sparse. This structure is both parsimonious and highly interpretable, as it implies that the outcome is related to the features through a few distinct pathways, each of which may only involve subsets of feature dimensions. We take a divide-and-conquer strategy to simplify the task into a set of sparse unit-rank tensor regression problems. To make the computation efficient and scalable, for the unit-rank tensor regression, we propose a stagewise estimation procedure to efficiently trace out its entire solution path. We show that as the step size goes to zero, the stagewise solution paths converge exactly to those of the corresponding regularized regression. The superior performance of our approach is demonstrated on various real-world and synthetic examples.", "bibtex": "@inproceedings{NEURIPS2018_8d34201a,\n author = {He, Lifang and Chen, Kun and Xu, Wanwan and Zhou, Jiayu and Wang, Fei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Boosted Sparse and Low-Rank Tensor Regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8d34201a5b85900908db6cae92723617-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8d34201a5b85900908db6cae92723617-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8d34201a5b85900908db6cae92723617-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8d34201a5b85900908db6cae92723617-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8d34201a5b85900908db6cae92723617-Reviews.html", "metareview": "", "pdf_size": 567158, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13402681948996325867&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Weill Cornell Medicine; University of Connecticut; University of Connecticut; Michigan State University; Weill Cornell Medicine", "aff_domain": "gmail.com;uconn.edu;uconn.edu;gmail.com;med.cornell.edu", "email": "gmail.com;uconn.edu;uconn.edu;gmail.com;med.cornell.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8d34201a5b85900908db6cae92723617-Abstract.html", "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "Weill Cornell Medicine;University of Connecticut;Michigan State University", "aff_unique_dep": ";;", "aff_unique_url": "https://weill.cornell.edu;https://www.uconn.edu;https://www.msu.edu", "aff_unique_abbr": "WCM;UConn;MSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Boosting Black Box Variational Inference", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11342", "id": "11342", "author_site": "Francesco Locatello, Gideon Dresdner, Rajiv Khanna, Isabel Valera, Gunnar Ratsch", "author": "Francesco Locatello; Gideon Dresdner; Rajiv Khanna; Isabel Valera; Gunnar Raetsch", "abstract": "Approximating a probability density in a tractable manner is a central task in Bayesian statistics. Variational Inference (VI) is a popular technique that achieves tractability by choosing a relatively simple variational approximation. Borrowing ideas from the classic boosting framework, recent approaches attempt to \\emph{boost} VI by replacing the selection of a single density with an iteratively constructed mixture of densities. In order to guarantee convergence, previous works impose stringent assumptions that require significant effort for practitioners. Specifically, they require a custom implementation of the greedy step (called the LMO) for every probabilistic model with respect to an unnatural variational family of truncated distributions. Our work fixes these issues with novel theoretical and algorithmic insights. On the theoretical side, we show that boosting VI satisfies a relaxed smoothness assumption which is sufficient for the convergence of the functional Frank-Wolfe (FW) algorithm. Furthermore, we rephrase the LMO problem and propose to maximize the Residual ELBO (RELBO) which replaces the standard ELBO optimization in VI. These theoretical enhancements allow for black box implementation of the boosting subroutine. Finally, we present a stopping criterion drawn from the duality gap in the classic FW analyses and exhaustive experiments to illustrate the usefulness of our theoretical and algorithmic contributions.", "bibtex": "@inproceedings{NEURIPS2018_74563ba2,\n author = {Locatello, Francesco and Dresdner, Gideon and Khanna, Rajiv and Valera, Isabel and Raetsch, Gunnar},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Boosting Black Box Variational Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/74563ba21a90da13dacf2a73e3ddefa7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/74563ba21a90da13dacf2a73e3ddefa7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/74563ba21a90da13dacf2a73e3ddefa7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/74563ba21a90da13dacf2a73e3ddefa7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/74563ba21a90da13dacf2a73e3ddefa7-Reviews.html", "metareview": "", "pdf_size": 462393, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=493456481295082921&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Max-Planck Institute for Intelligent Systems, Germany+Dept. for Computer Science, ETH Zurich, Universit\u00e4tsstrasse 6, 8092 Zurich, Switzerland; Dept. for Computer Science, ETH Zurich, Universit\u00e4tsstrasse 6, 8092 Zurich, Switzerland; The University of Texas at Austin, USA; Max-Planck Institute for Intelligent Systems, Germany; Dept. for Computer Science, ETH Zurich, Universit\u00e4tsstrasse 6, 8092 Zurich, Switzerland", "aff_domain": "; ; ; ; ", "email": "; ; ; ; ", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/74563ba21a90da13dacf2a73e3ddefa7-Abstract.html", "aff_unique_index": "0+1;1;2;0;1", "aff_unique_norm": "Max-Planck Institute for Intelligent Systems;ETH Zurich;University of Texas at Austin", "aff_unique_dep": ";Dept. for Computer Science;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.ethz.ch;https://www.utexas.edu", "aff_unique_abbr": "MPI-IS;ETHZ;UT Austin", "aff_campus_unique_index": "1;1;2;1", "aff_campus_unique": ";Zurich;Austin", "aff_country_unique_index": "0+1;1;2;0;1", "aff_country_unique": "Germany;Switzerland;United States" }, { "title": "Bounded-Loss Private Prediction Markets", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11987", "id": "11987", "author_site": "Rafael Frongillo, Bo Waggoner", "author": "Rafael Frongillo; Bo Waggoner", "abstract": "Prior work has investigated variations of prediction markets that preserve participants' (differential) privacy, which formed the basis of useful mechanisms for purchasing data for machine learning objectives.\n Such markets required potentially unlimited financial subsidy, however, making them impractical.\n In this work, we design an adaptively-growing prediction market with a bounded financial subsidy, while achieving privacy, incentives to produce accurate predictions, and precision in the sense that market prices are\n not heavily impacted by the added privacy-preserving noise.\n We briefly discuss how our mechanism can extend to the data-purchasing setting, and its relationship to traditional learning algorithms.", "bibtex": "@inproceedings{NEURIPS2018_b2dc4495,\n author = {Frongillo, Rafael and Waggoner, Bo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bounded-Loss Private Prediction Markets},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b2dc449578a4744a1684d3b0ea933702-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b2dc449578a4744a1684d3b0ea933702-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b2dc449578a4744a1684d3b0ea933702-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b2dc449578a4744a1684d3b0ea933702-Reviews.html", "metareview": "", "pdf_size": 329080, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2789940486672377775&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "Colorado Boulder; Microsoft Research", "aff_domain": "colorado.edu;colorado.edu", "email": "colorado.edu;colorado.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b2dc449578a4744a1684d3b0ea933702-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University of Colorado Boulder;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.colorado.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "CU Boulder;MSR", "aff_campus_unique_index": "0", "aff_campus_unique": "Boulder;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "BourGAN: Generative Networks with Metric Embeddings", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11237", "id": "11237", "author_site": "Chang Xiao, Peilin Zhong, Changxi Zheng", "author": "Chang Xiao; Peilin Zhong; Changxi Zheng", "abstract": "This paper addresses the mode collapse for generative adversarial networks (GANs). We view modes as a geometric structure of data distribution in a metric space. Under this geometric lens, we embed subsamples of the dataset from an arbitrary metric space into the L2 space, while preserving their pairwise distance distribution. Not only does this metric embedding determine the dimensionality of the latent space automatically, it also enables us to construct a mixture of Gaussians to draw latent space random vectors. We use the Gaussian mixture model in tandem with a simple augmentation of the objective function to train GANs. Every major step of our method is supported by theoretical analysis, and our experiments on real and synthetic data confirm that the generator is able to produce samples spreading over most of the modes while avoiding unwanted samples, outperforming several recent GAN variants on a number of metrics and offering new features.", "bibtex": "@inproceedings{NEURIPS2018_8a3363ab,\n author = {Xiao, Chang and Zhong, Peilin and Zheng, Changxi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {BourGAN: Generative Networks with Metric Embeddings},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8a3363abe792db2d8761d6403605aeb7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8a3363abe792db2d8761d6403605aeb7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8a3363abe792db2d8761d6403605aeb7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8a3363abe792db2d8761d6403605aeb7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8a3363abe792db2d8761d6403605aeb7-Reviews.html", "metareview": "", "pdf_size": 6665766, "gs_citation": 83, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5094894076682723050&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Columbia University; Columbia University; Columbia University", "aff_domain": "cs.columbia.edu;cs.columbia.edu;cs.columbia.edu", "email": "cs.columbia.edu;cs.columbia.edu;cs.columbia.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8a3363abe792db2d8761d6403605aeb7-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Breaking the Activation Function Bottleneck through Adaptive Parameterization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11743", "id": "11743", "author_site": "Sebastian Flennerhag, Hujun Yin, John Keane, Mark Elliot", "author": "Sebastian Flennerhag; Hujun Yin; John Keane; Mark Elliot", "abstract": "Standard neural network architectures are non-linear only by virtue of a simple element-wise activation function, making them both brittle and excessively large. In this paper, we consider methods for making the feed-forward layer more flexible while preserving its basic structure. We develop simple drop-in replacements that learn to adapt their parameterization conditional on the input, thereby increasing statistical efficiency significantly. We present an adaptive LSTM that advances the state of the art for the Penn Treebank and Wikitext-2 word-modeling tasks while using fewer parameters and converging in half as many iterations.", "bibtex": "@inproceedings{NEURIPS2018_cac8e130,\n author = {Flennerhag, Sebastian and Yin, Hujun and Keane, John and Elliot, Mark},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Breaking the Activation Function Bottleneck through Adaptive Parameterization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cac8e13055d2e4f62b6322254203b293-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cac8e13055d2e4f62b6322254203b293-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cac8e13055d2e4f62b6322254203b293-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cac8e13055d2e4f62b6322254203b293-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cac8e13055d2e4f62b6322254203b293-Reviews.html", "metareview": "", "pdf_size": 2985943, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=707894120541881868&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "University of Manchester + The Alan Turing Institute; University of Manchester + The Alan Turing Institute; University of Manchester; University of Manchester", "aff_domain": "turing.ac.uk;manchester.ac.uk;manchester.ac.uk;manchester.ac.uk", "email": "turing.ac.uk;manchester.ac.uk;manchester.ac.uk;manchester.ac.uk", "github": "https://github.com/flennerhag/alstm", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cac8e13055d2e4f62b6322254203b293-Abstract.html", "aff_unique_index": "0+1;0+1;0;0", "aff_unique_norm": "University of Manchester;Alan Turing Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.manchester.ac.uk;https://www.turing.ac.uk", "aff_unique_abbr": "UoM;ATI", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0+0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Breaking the Curse of Horizon: Infinite-Horizon Off-Policy Estimation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11523", "id": "11523", "author_site": "Qiang Liu, Lihong Li, Ziyang Tang, Denny Zhou", "author": "Qiang Liu; Lihong Li; Ziyang Tang; Dengyong Zhou", "abstract": "We consider the off-policy estimation problem of estimating the expected reward of a target policy using samples collected by a different behavior policy. Importance sampling (IS) has been a key technique to derive (nearly) unbiased estimators, but is known to suffer from an excessively high variance in long-horizon problems. In the extreme case of in infinite-horizon problems, the variance of an IS-based estimator may even be unbounded. In this paper, we propose a new off-policy estimation method that applies IS directly on the stationary state-visitation distributions to avoid the exploding variance issue faced by existing estimators.Our key contribution is a novel approach to estimating the density ratio of two stationary distributions, with trajectories sampled from only the behavior distribution. We develop a mini-max loss function for the estimation problem, and derive a closed-form solution for the case of RKHS. We support our method with both theoretical and empirical analyses.", "bibtex": "@inproceedings{NEURIPS2018_dda04f9d,\n author = {Liu, Qiang and Li, Lihong and Tang, Ziyang and Zhou, Dengyong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Breaking the Curse of Horizon: Infinite-Horizon Off-Policy Estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dda04f9d634145a9c68d5dfe53b21272-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dda04f9d634145a9c68d5dfe53b21272-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dda04f9d634145a9c68d5dfe53b21272-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dda04f9d634145a9c68d5dfe53b21272-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dda04f9d634145a9c68d5dfe53b21272-Reviews.html", "metareview": "", "pdf_size": 2428217, "gs_citation": 429, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5705264309372217453&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "The University of Texas at Austin, Austin, TX, 78712; Google Brain, Kirkland, WA, 98033; The University of Texas at Austin, Austin, TX, 78712; Google Brain, Kirkland, WA, 98033", "aff_domain": "cs.utexas.edu;google.com;cs.utexas.edu;google.com", "email": "cs.utexas.edu;google.com;cs.utexas.edu;google.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dda04f9d634145a9c68d5dfe53b21272-Abstract.html", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Texas at Austin;Google", "aff_unique_dep": ";Google Brain", "aff_unique_url": "https://www.utexas.edu;https://brain.google.com", "aff_unique_abbr": "UT Austin;Google Brain", "aff_campus_unique_index": "0;1;0;1", "aff_campus_unique": "Austin;Kirkland", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Breaking the Span Assumption Yields Fast Finite-Sum Minimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11241", "id": "11241", "author_site": "Robert Hannah, Yanli Liu, Daniel O'Connor, Wotao Yin", "author": "Robert Hannah; Yanli Liu; Daniel O'Connor; Wotao Yin", "abstract": "In this paper, we show that SVRG and SARAH can be modified to be fundamentally faster than all of the other standard algorithms that minimize the sum of $n$ smooth functions, such as SAGA, SAG, SDCA, and SDCA without duality. Most finite sum algorithms follow what we call the ``span assumption'': Their updates are in the span of a sequence of component gradients chosen in a random IID fashion. In the big data regime, where the condition number $\\kappa=O(n)$, the span assumption prevents algorithms from converging to an approximate solution of accuracy $\\epsilon$ in less than $n\\ln(1/\\epsilon)$ iterations. SVRG and SARAH do not follow the span assumption since they are updated with a hybrid of full-gradient and component-gradient information. We show that because of this, they can be up to $\\Omega(1+(\\ln(n/\\kappa))_+)$ times faster. In particular, to obtain an accuracy $\\epsilon = 1/n^\\alpha$ for $\\kappa=n^\\beta$ and $\\alpha,\\beta\\in(0,1)$, modified SVRG requires $O(n)$ iterations, whereas algorithms that follow the span assumption require $O(n\\ln(n))$ iterations. Moreover, we present lower bound results that show this speedup is optimal, and provide analysis to help explain why this speedup exists. With the understanding that the span assumption is a point of weakness of finite sum algorithms, future work may purposefully exploit this to yield faster algorithms in the big data regime.", "bibtex": "@inproceedings{NEURIPS2018_abea47ba,\n author = {Hannah, Robert and Liu, Yanli and O\\textquotesingle Connor, Daniel and Yin, Wotao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Breaking the Span Assumption Yields Fast Finite-Sum Minimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/abea47ba24142ed16b7d8fbf2c740e0d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/abea47ba24142ed16b7d8fbf2c740e0d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/abea47ba24142ed16b7d8fbf2c740e0d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/abea47ba24142ed16b7d8fbf2c740e0d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/abea47ba24142ed16b7d8fbf2c740e0d-Reviews.html", "metareview": "", "pdf_size": 680136, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14246662597072859407&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Mathematics, University of California, Los Angeles; Department of Mathematics, University of California, Los Angeles; Department of Mathematics, University of San Francisco; Department of Mathematics, University of California, Los Angeles", "aff_domain": "gmail.com;math.ucla.edu;gmail.com;math.ucla.edu", "email": "gmail.com;math.ucla.edu;gmail.com;math.ucla.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/abea47ba24142ed16b7d8fbf2c740e0d-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Los Angeles;University of San Francisco", "aff_unique_dep": "Department of Mathematics;Department of Mathematics", "aff_unique_url": "https://www.ucla.edu;https://www.usfca.edu", "aff_unique_abbr": "UCLA;USF", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Los Angeles;San Francisco", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "But How Does It Work in Theory? Linear SVM with Random Features", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11340", "id": "11340", "author_site": "Yitong Sun, Anna Gilbert, Ambuj Tewari", "author": "Yitong Sun; Anna Gilbert; Ambuj Tewari", "abstract": "We prove that, under low noise assumptions, the support vector machine with $N\\ll m$ random features (RFSVM) can achieve the learning rate faster than $O(1/\\sqrt{m})$ on a training set with $m$ samples when an optimized feature map is used. Our work extends the previous fast rate analysis of random features method from least square loss to 0-1 loss. We also show that the reweighted feature selection method, which approximates the optimized feature map, helps improve the performance of RFSVM in experiments on a synthetic data set.", "bibtex": "@inproceedings{NEURIPS2018_464d828b,\n author = {Sun, Yitong and Gilbert, Anna and Tewari, Ambuj},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {But How Does It Work in Theory? Linear SVM with Random Features},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/464d828b85b0bed98e80ade0a5c43b0f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/464d828b85b0bed98e80ade0a5c43b0f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/464d828b85b0bed98e80ade0a5c43b0f-Reviews.html", "metareview": "", "pdf_size": 357184, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2923305469042609420&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Mathematics, University of Michigan; Department of Mathematics, University of Michigan; Department of Statistics, University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu", "email": "umich.edu;umich.edu;umich.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/464d828b85b0bed98e80ade0a5c43b0f-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "Department of Mathematics", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Byzantine Stochastic Gradient Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11454", "id": "11454", "author_site": "Dan Alistarh, Zeyuan Allen-Zhu, Jerry Li", "author": "Dan Alistarh; Zeyuan Allen-Zhu; Jerry Li", "abstract": "This paper studies the problem of distributed stochastic optimization in an adversarial setting where, out of $m$ machines which allegedly compute stochastic gradients every iteration, an $\\alpha$-fraction are Byzantine, and may behave adversarially. Our main result is a variant of stochastic gradient descent (SGD) which finds $\\varepsilon$-approximate minimizers of convex functions in $T = \\tilde{O}\\big( \\frac{1}{\\varepsilon^2 m} + \\frac{\\alpha^2}{\\varepsilon^2} \\big)$ iterations. In contrast, traditional mini-batch SGD needs $T = O\\big( \\frac{1}{\\varepsilon^2 m} \\big)$ iterations, but cannot tolerate Byzantine failures.\nFurther, we provide a lower bound showing that, up to logarithmic factors, our algorithm is information-theoretically optimal both in terms of sample complexity and time complexity.", "bibtex": "@inproceedings{NEURIPS2018_a07c2f3b,\n author = {Alistarh, Dan and Allen-Zhu, Zeyuan and Li, Jerry},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Byzantine Stochastic Gradient Descent},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a07c2f3b3b907aaf8436a26c6d77f0a2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a07c2f3b3b907aaf8436a26c6d77f0a2-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a07c2f3b3b907aaf8436a26c6d77f0a2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a07c2f3b3b907aaf8436a26c6d77f0a2-Reviews.html", "metareview": "", "pdf_size": 634483, "gs_citation": 364, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12003696575207963007&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "IST Austria; Microsoft Research AI; Simons Institute", "aff_domain": "ist.ac.at;csail.mit.edu;berkeley.edu", "email": "ist.ac.at;csail.mit.edu;berkeley.edu", "github": "", "project": "https://arxiv.org/abs/1803.08917", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a07c2f3b3b907aaf8436a26c6d77f0a2-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Institute of Science and Technology Austria;Microsoft;Simons Institute for the Theory of Computing", "aff_unique_dep": ";AI;", "aff_unique_url": "https://www.ist.ac.at;https://www.microsoft.com/en-us/research;https://simons.berkeley.edu", "aff_unique_abbr": "IST Austria;MSR;SITC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Austria;United States" }, { "title": "COLA: Decentralized Linear Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11447", "id": "11447", "author_site": "Lie He, Yatao Bian, Martin Jaggi", "author": "Lie He; An Bian; Martin Jaggi", "abstract": "Decentralized machine learning is a promising emerging paradigm in view of global challenges of data ownership and privacy. We consider learning of linear classification and regression models, in the setting where the training data is decentralized over many user devices, and the learning algorithm must run on-device, on an arbitrary communication network, without a central coordinator.\nWe propose COLA, a new decentralized training algorithm with strong theoretical guarantees and superior practical performance. Our framework overcomes many limitations of existing methods, and achieves communication efficiency, scalability, elasticity as well as resilience to changes in data and allows for unreliable and heterogeneous participating devices.", "bibtex": "@inproceedings{NEURIPS2018_05a70454,\n author = {He, Lie and Bian, An and Jaggi, Martin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {COLA: Decentralized Linear Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/05a70454516ecd9194c293b0e415777f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/05a70454516ecd9194c293b0e415777f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/05a70454516ecd9194c293b0e415777f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/05a70454516ecd9194c293b0e415777f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/05a70454516ecd9194c293b0e415777f-Reviews.html", "metareview": "", "pdf_size": 988625, "gs_citation": 175, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15790148886977326889&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "EPFL; ETH Zurich; EPFL", "aff_domain": "epfl.ch;inf.ethz.ch;epfl.ch", "email": "epfl.ch;inf.ethz.ch;epfl.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/05a70454516ecd9194c293b0e415777f-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "EPFL;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.ethz.ch", "aff_unique_abbr": "EPFL;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Can We Gain More from Orthogonality Regularizations in Training Deep Networks?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11422", "id": "11422", "author_site": "Nitin Bansal, Xiaohan Chen, Zhangyang Wang", "author": "Nitin Bansal; Xiaohan Chen; Zhangyang Wang", "abstract": "This paper seeks to answer the question: as the (near-) orthogonality of weights is found to be a favorable property for training deep convolutional neural networks, how can we enforce it in more effective and easy-to-use ways? We develop novel orthogonality regularizations on training deep CNNs, utilizing various advanced analytical tools such as mutual coherence and restricted isometry property. These plug-and-play regularizations can be conveniently incorporated into training almost any CNN without extra hassle. We then benchmark their effects on state-of-the-art models: ResNet, WideResNet, and ResNeXt, on several most popular computer vision datasets: CIFAR-10, CIFAR-100, SVHN and ImageNet. We observe consistent performance gains after applying those proposed regularizations, in terms of both the final accuracies achieved, and faster and more stable convergences. We have made our codes and pre-trained models publicly available: https://github.com/nbansal90/Can-we-Gain-More-from-Orthogonality.", "bibtex": "@inproceedings{NEURIPS2018_bf424cb7,\n author = {Bansal, Nitin and Chen, Xiaohan and Wang, Zhangyang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Can We Gain More from Orthogonality Regularizations in Training Deep Networks?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/bf424cb7b0dea050a42b9739eb261a3a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/bf424cb7b0dea050a42b9739eb261a3a-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/bf424cb7b0dea050a42b9739eb261a3a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/bf424cb7b0dea050a42b9739eb261a3a-Reviews.html", "metareview": "", "pdf_size": 1652692, "gs_citation": 393, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16253012284749788151&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "https://github.com/nbansal90/Can-we-Gain-More-from-Orthogonality", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/bf424cb7b0dea050a42b9739eb261a3a-Abstract.html" }, { "title": "CapProNet: Deep Feature Learning via Orthogonal Projections onto Capsule Subspaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11566", "id": "11566", "author_site": "Liheng Zhang, Marzieh Edraki, Guo-Jun Qi", "author": "Liheng Zhang; Marzieh Edraki; Guo-Jun Qi", "abstract": "In this paper, we formalize the idea behind capsule nets of using a capsule vector rather than a neuron activation to predict the label of samples. To this end, we propose to learn a group of capsule subspaces onto which an input feature vector is projected. Then the lengths of resultant capsules are used to score the probability of belonging to different classes. We train such a Capsule Projection Network (CapProNet) by learning an orthogonal projection matrix for each capsule subspace, and show that each capsule subspace is updated until it contains input feature vectors corresponding to the associated class. With low dimensionality of capsule subspace as well as an iterative method to estimate the matrix inverse, only a small negligible computing overhead is incurred to train the network. Experiment results on image datasets show the presented network can greatly improve the performance of state-of-the-art Resnet backbones by $10-20\\%$ with almost the same computing cost.", "bibtex": "@inproceedings{NEURIPS2018_f5b1b89d,\n author = {Zhang, Liheng and Edraki, Marzieh and Qi, Guo-Jun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {CapProNet: Deep Feature Learning via Orthogonal Projections onto Capsule Subspaces},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f5b1b89d98b7286673128a5fb112cb9a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f5b1b89d98b7286673128a5fb112cb9a-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f5b1b89d98b7286673128a5fb112cb9a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f5b1b89d98b7286673128a5fb112cb9a-Reviews.html", "metareview": "", "pdf_size": 637137, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8590807570091055066&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "http://maple.cs.ucf.edu", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f5b1b89d98b7286673128a5fb112cb9a-Abstract.html" }, { "title": "CatBoost: unbiased boosting with categorical features", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11641", "id": "11641", "author_site": "Liudmila Prokhorenkova, Gleb Gusev, Aleksandr Vorobev, Anna Veronika Dorogush, Andrey Gulin", "author": "Liudmila Prokhorenkova; Gleb Gusev; Aleksandr Vorobev; Anna Veronika Dorogush; Andrey Gulin", "abstract": "This paper presents the key algorithmic techniques behind CatBoost, a new gradient boosting toolkit. Their combination leads to CatBoost outperforming other publicly available boosting implementations in terms of quality on a variety of datasets. Two critical algorithmic advances introduced in CatBoost are the implementation of ordered boosting, a permutation-driven alternative to the classic algorithm, and an innovative algorithm for processing categorical features. Both techniques were created to fight a prediction shift caused by a special kind of target leakage present in all currently existing implementations of gradient boosting algorithms. In this paper, we provide a detailed analysis of this problem and demonstrate that proposed algorithms solve it effectively, leading to excellent empirical results.", "bibtex": "@inproceedings{NEURIPS2018_14491b75,\n author = {Prokhorenkova, Liudmila and Gusev, Gleb and Vorobev, Aleksandr and Dorogush, Anna Veronika and Gulin, Andrey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {CatBoost: unbiased boosting with categorical features},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/14491b756b3a51daac41c24863285549-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/14491b756b3a51daac41c24863285549-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/14491b756b3a51daac41c24863285549-Reviews.html", "metareview": "", "pdf_size": 553040, "gs_citation": 5655, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15125594264257209192&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Yandex, Moscow, Russia + Moscow Institute of Physics and Technology, Dolgoprudny, Russia; Yandex, Moscow, Russia + Moscow Institute of Physics and Technology, Dolgoprudny, Russia; Yandex, Moscow, Russia; Yandex, Moscow, Russia; Yandex, Moscow, Russia", "aff_domain": "yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru", "email": "yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru", "github": "https://github.com/catboost/catboost", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/14491b756b3a51daac41c24863285549-Abstract.html", "aff_unique_index": "0+1;0+1;0;0;0", "aff_unique_norm": "Yandex;Moscow Institute of Physics and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://yandex.com;https://www.mipt.ru", "aff_unique_abbr": "Yandex;MIPT", "aff_campus_unique_index": "0+1;0+1;0;0;0", "aff_campus_unique": "Moscow;Dolgoprudny", "aff_country_unique_index": "0+0;0+0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Causal Discovery from Discrete Data using Hidden Compact Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11274", "id": "11274", "author_site": "Ruichu Cai, Jie Qiao, Kun Zhang, Zhenjie Zhang, Zhifeng Hao", "author": "Ruichu Cai; Jie Qiao; Kun Zhang; Zhenjie Zhang; Zhifeng Hao", "abstract": "Causal discovery from a set of observations is one of the fundamental problems across several disciplines. For continuous variables, recently a number of causal discovery methods have demonstrated their effectiveness in distinguishing the cause from effect by exploring certain properties of the conditional distribution, but causal discovery on categorical data still remains to be a challenging problem, because it is generally not easy to find a compact description of the causal mechanism for the true causal direction. In this paper we make an attempt to find a way to solve this problem by assuming a two-stage causal process: the first stage maps the cause to a hidden variable of a lower cardinality, and the second stage generates the effect from the hidden representation. In this way, the causal mechanism admits a simple yet compact representation. We show that under this model, the causal direction is identifiable under some weak conditions on the true causal mechanism. We also provide an effective solution to recover the above hidden compact representation within the likelihood framework. Empirical studies verify the effectiveness of the proposed approach on both synthetic and real-world data.", "bibtex": "@inproceedings{NEURIPS2018_8d3369c4,\n author = {Cai, Ruichu and Qiao, Jie and Zhang, Kun and Zhang, Zhenjie and Hao, Zhifeng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Causal Discovery from Discrete Data using Hidden Compact Representation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8d3369c4c086f236fabf61d614a32818-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8d3369c4c086f236fabf61d614a32818-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8d3369c4c086f236fabf61d614a32818-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8d3369c4c086f236fabf61d614a32818-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8d3369c4c086f236fabf61d614a32818-Reviews.html", "metareview": "", "pdf_size": 327688, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2547222361550922358&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "School of Computer Science, Guangdong University of Technology, China; School of Computer Science, Guangdong University of Technology, China; Department of philosophy, Carnegie Mellon University; Singapore R&D, Yitu Technology Ltd.; School of Computer Science, Guangdong University of Technology, China + School of Mathematics and Big Data, Foshan University, China", "aff_domain": "gdut.edu.cn;gmail.com;andrew.cmu.edu;yitu-inc.com;gdut.edu.cn", "email": "gdut.edu.cn;gmail.com;andrew.cmu.edu;yitu-inc.com;gdut.edu.cn", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8d3369c4c086f236fabf61d614a32818-Abstract.html", "aff_unique_index": "0;0;1;2;0+3", "aff_unique_norm": "Guangdong University of Technology;Carnegie Mellon University;Yitu Technology Ltd.;Foshan University", "aff_unique_dep": "School of Computer Science;Department of Philosophy;Singapore R&D;School of Mathematics and Big Data", "aff_unique_url": ";https://www.cmu.edu;https://www.yitutech.com;", "aff_unique_abbr": ";CMU;Yitu Tech;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0+0", "aff_country_unique": "China;United States;Singapore" }, { "title": "Causal Inference and Mechanism Clustering of A Mixture of Additive Noise Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11509", "id": "11509", "author_site": "Shoubo Hu, Zhitang Chen, Vahid Partovi Nia, Laiwan CHAN, Yanhui Geng", "author": "Shoubo Hu; Zhitang Chen; Vahid Partovi Nia; Laiwan CHAN; Yanhui Geng", "abstract": "The inference of the causal relationship between a pair of observed variables is a fundamental problem in science, and most existing approaches are based on one single causal model. In practice, however, observations are often collected from multiple sources with heterogeneous causal models due to certain uncontrollable factors, which renders causal analysis results obtained by a single model skeptical. In this paper, we generalize the Additive Noise Model (ANM) to a mixture model, which consists of a finite number of ANMs, and provide the condition of its causal identifiability. To conduct model estimation, we propose Gaussian Process Partially Observable Model (GPPOM), and incorporate independence enforcement into it to learn latent parameter associated with each observation. Causal inference and clustering according to the underlying generating mechanisms of the mixture model are addressed in this work. Experiments on synthetic and real data demonstrate the effectiveness of our proposed approach.", "bibtex": "@inproceedings{NEURIPS2018_34766559,\n author = {Hu, Shoubo and Chen, Zhitang and Partovi Nia, Vahid and CHAN, Laiwan and Geng, Yanhui},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Causal Inference and Mechanism Clustering of A Mixture of Additive Noise Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/347665597cbfaef834886adbb848011f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/347665597cbfaef834886adbb848011f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/347665597cbfaef834886adbb848011f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/347665597cbfaef834886adbb848011f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/347665597cbfaef834886adbb848011f-Reviews.html", "metareview": "", "pdf_size": 720398, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17153751836211673378&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/347665597cbfaef834886adbb848011f-Abstract.html" }, { "title": "Causal Inference via Kernel Deviance Measures", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11673", "id": "11673", "author_site": "Jovana Mitrovic, Dino Sejdinovic, Yee Whye Teh", "author": "Jovana Mitrovic; Dino Sejdinovic; Yee Whye Teh", "abstract": "Discovering the causal structure among a set of variables is a fundamental problem in many areas of science. In this paper, we propose Kernel Conditional Deviance for Causal Inference (KCDC) a fully nonparametric causal discovery method based on purely observational data. From a novel interpretation of the notion of asymmetry between cause and effect, we derive a corresponding asymmetry measure using the framework of reproducing kernel Hilbert spaces. Based on this, we propose three decision rules for causal discovery. We demonstrate the wide applicability and robustness of our method across a range of diverse synthetic datasets. Furthermore, we test our method on real-world time series data and the real-world benchmark dataset T\u00fcbingen Cause-Effect Pairs where we outperform state-of-the-art approaches.", "bibtex": "@inproceedings{NEURIPS2018_73fed7fd,\n author = {Mitrovic, Jovana and Sejdinovic, Dino and Teh, Yee Whye},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Causal Inference via Kernel Deviance Measures},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/73fed7fd472e502d8908794430511f4d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/73fed7fd472e502d8908794430511f4d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/73fed7fd472e502d8908794430511f4d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/73fed7fd472e502d8908794430511f4d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/73fed7fd472e502d8908794430511f4d-Reviews.html", "metareview": "", "pdf_size": 353661, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14371501661623148966&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Statistics, University of Oxford + DeepMind, UK; Department of Statistics, University of Oxford; Department of Statistics, University of Oxford + DeepMind, UK", "aff_domain": "stats.ox.ac.uk;stats.ox.ac.uk;stats.ox.ac.uk", "email": "stats.ox.ac.uk;stats.ox.ac.uk;stats.ox.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/73fed7fd472e502d8908794430511f4d-Abstract.html", "aff_unique_index": "0+1;0;0+1", "aff_unique_norm": "University of Oxford;DeepMind", "aff_unique_dep": "Department of Statistics;", "aff_unique_url": "https://www.ox.ac.uk;https://deepmind.com", "aff_unique_abbr": "Oxford;DeepMind", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0+0;0;0+0", "aff_country_unique": "United Kingdom" }, { "title": "Causal Inference with Noisy and Missing Covariates via Matrix Factorization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11667", "id": "11667", "author_site": "Nathan Kallus, Xiaojie Mao, Madeleine Udell", "author": "Nathan Kallus; Xiaojie Mao; Madeleine Udell", "abstract": "Valid causal inference in observational studies often requires controlling for confounders. However, in practice measurements of confounders may be noisy, and can lead to biased estimates of causal effects. We show that we can reduce bias induced by measurement noise using a large number of noisy measurements of the underlying confounders. We propose the use of matrix factorization to infer the confounders from noisy covariates. This flexible and principled framework adapts to missing values, accommodates a wide variety of data types, and can enhance a wide variety of causal inference methods. We bound the error for the induced average treatment effect estimator and show it is consistent in a linear regression setting, using Exponential Family Matrix Completion preprocessing. We demonstrate the effectiveness of the proposed procedure in numerical experiments with both synthetic data and real clinical data.", "bibtex": "@inproceedings{NEURIPS2018_86a1793f,\n author = {Kallus, Nathan and Mao, Xiaojie and Udell, Madeleine},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Causal Inference with Noisy and Missing Covariates via Matrix Factorization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/86a1793f65aeef4aeef4b479fc9b2bca-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/86a1793f65aeef4aeef4b479fc9b2bca-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/86a1793f65aeef4aeef4b479fc9b2bca-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/86a1793f65aeef4aeef4b479fc9b2bca-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/86a1793f65aeef4aeef4b479fc9b2bca-Reviews.html", "metareview": "", "pdf_size": 421164, "gs_citation": 88, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14104978633422349618&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Cornell University; Cornell University; Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu", "email": "cornell.edu;cornell.edu;cornell.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/86a1793f65aeef4aeef4b479fc9b2bca-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Chain of Reasoning for Visual Question Answering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11053", "id": "11053", "author_site": "Chenfei Wu, Jinlai Liu, Xiaojie Wang, Xuan Dong", "author": "Chenfei Wu; Jinlai Liu; Xiaojie Wang; Xuan Dong", "abstract": "Reasoning plays an essential role in Visual Question Answering (VQA). Multi-step and dynamic reasoning is often necessary for answering complex questions. For example, a question \"What is placed next to the bus on the right of the picture?\" talks about a compound object \"bus on the right,\" which is generated by the relation", "bibtex": "@inproceedings{NEURIPS2018_31fefc0e,\n author = {Wu, Chenfei and Liu, Jinlai and Wang, Xiaojie and Dong, Xuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Chain of Reasoning for Visual Question Answering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/31fefc0e570cb3860f2a6d4b38c6490d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/31fefc0e570cb3860f2a6d4b38c6490d-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/31fefc0e570cb3860f2a6d4b38c6490d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/31fefc0e570cb3860f2a6d4b38c6490d-Reviews.html", "metareview": "", "pdf_size": 4217544, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4092078810856262882&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Center for Intelligence Science and Technology; Center for Intelligence Science and Technology; Center for Intelligence Science and Technology; Center for Intelligence Science and Technology", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "email": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/31fefc0e570cb3860f2a6d4b38c6490d-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Center for Intelligence Science and Technology", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "Chaining Mutual Information and Tightening Generalization Bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11697", "id": "11697", "author_site": "Amir Asadi, Emmanuel Abbe, Sergio Verdu", "author": "Amir Asadi; Emmanuel Abbe; Sergio Verdu", "abstract": "Bounding the generalization error of learning algorithms has a long history, which yet falls short in explaining various generalization successes including those of deep learning. Two important difficulties are (i) exploiting the dependencies between the hypotheses, (ii) exploiting the dependence between the algorithm\u2019s input and output. Progress on the first point was made with the chaining method, originating from the work of Kolmogorov, and used in the VC-dimension bound. More recently, progress on the second point was made with the mutual information method by Russo and Zou \u201915. Yet, these two methods are currently disjoint. In this paper, we introduce a technique to combine chaining and mutual information methods, to obtain a generalization bound that is both algorithm-dependent and that exploits the dependencies between the hypotheses. We provide an example in which our bound significantly outperforms both the chaining and the mutual information bounds. As a corollary, we tighten Dudley\u2019s inequality when the learning algorithm chooses its output from a small subset of hypotheses with high probability.", "bibtex": "@inproceedings{NEURIPS2018_8d7628dd,\n author = {Asadi, Amir and Abbe, Emmanuel and Verdu, Sergio},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Chaining Mutual Information and Tightening Generalization Bounds},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8d7628dd7a710c8638dbd22d4421ee46-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8d7628dd7a710c8638dbd22d4421ee46-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8d7628dd7a710c8638dbd22d4421ee46-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8d7628dd7a710c8638dbd22d4421ee46-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8d7628dd7a710c8638dbd22d4421ee46-Reviews.html", "metareview": "", "pdf_size": 440657, "gs_citation": 137, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17896542891115925743&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Princeton University; Princeton University + EPFL; Princeton University", "aff_domain": "princeton.edu; ; ", "email": "princeton.edu; ; ", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8d7628dd7a710c8638dbd22d4421ee46-Abstract.html", "aff_unique_index": "0;0+1;0", "aff_unique_norm": "Princeton University;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.epfl.ch", "aff_unique_abbr": "Princeton;EPFL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+1;0", "aff_country_unique": "United States;Switzerland" }, { "title": "ChannelNets: Compact and Efficient Convolutional Neural Networks via Channel-Wise Convolutions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11508", "id": "11508", "author_site": "Hongyang Gao, Zhengyang Wang, Shuiwang Ji", "author": "Hongyang Gao; Zhengyang Wang; Shuiwang Ji", "abstract": "Convolutional neural networks (CNNs) have shown great capability of solving various artificial intelligence tasks. However, the increasing model size has raised challenges in employing them in resource-limited applications. In this work, we propose to compress deep models by using channel-wise convolutions, which replace dense connections among feature maps with sparse ones in CNNs. Based on this novel operation, we build light-weight CNNs known as ChannelNets. ChannelNets use three instances of channel-wise convolutions; namely group channel-wise convolutions, depth-wise separable channel-wise convolutions, and the convolutional classification layer. Compared to prior CNNs designed for mobile devices, ChannelNets achieve a significant reduction in terms of the number of parameters and computational cost without loss in accuracy. Notably, our work represents the first attempt to compress the fully-connected classification layer, which usually accounts for about 25% of total parameters in compact CNNs. Experimental results on the ImageNet dataset demonstrate that ChannelNets achieve consistently better performance compared to prior methods.", "bibtex": "@inproceedings{NEURIPS2018_6a4cbdae,\n author = {Gao, Hongyang and Wang, Zhengyang and Ji, Shuiwang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {ChannelNets: Compact and Efficient Convolutional Neural Networks via Channel-Wise Convolutions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6a4cbdaedcbda0fa8ddc7ea32073c475-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6a4cbdaedcbda0fa8ddc7ea32073c475-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6a4cbdaedcbda0fa8ddc7ea32073c475-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6a4cbdaedcbda0fa8ddc7ea32073c475-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6a4cbdaedcbda0fa8ddc7ea32073c475-Reviews.html", "metareview": "", "pdf_size": 337071, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff": "Texas A&M University; Texas A&M University; Texas A&M University", "aff_domain": "tamu.edu;tamu.edu;tamu.edu", "email": "tamu.edu;tamu.edu;tamu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6a4cbdaedcbda0fa8ddc7ea32073c475-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Clebsch\u2013Gordan Nets: a Fully Fourier Space Spherical Convolutional Neural Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11959", "id": "11959", "author_site": "Risi Kondor, Zhen Lin, Shubhendu Trivedi", "author": "Risi Kondor; Zhen Lin; Shubhendu Trivedi", "abstract": "Recent work by Cohen et al. has achieved state-of-the-art results for learning spherical images in a rotation invariant way by using ideas from group representation theory and noncommutative harmonic analysis. In this paper we propose a generalization of this work that generally exhibits improved performace, but from an implementation point of view is actually simpler. An unusual feature of the proposed architecture is that it uses the Clebsch--Gordan transform as its only source of nonlinearity, thus avoiding repeated forward and backward Fourier transforms. The underlying ideas of the paper generalize to constructing neural networks that are invariant to the action of other compact groups.", "bibtex": "@inproceedings{NEURIPS2018_a3fc981a,\n author = {Kondor, Risi and Lin, Zhen and Trivedi, Shubhendu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Clebsch\\textendash Gordan Nets: a Fully Fourier Space Spherical Convolutional Neural Network},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a3fc981af450752046be179185ebc8b5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a3fc981af450752046be179185ebc8b5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a3fc981af450752046be179185ebc8b5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a3fc981af450752046be179185ebc8b5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a3fc981af450752046be179185ebc8b5-Reviews.html", "metareview": "", "pdf_size": 304372, "gs_citation": 327, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11612016599464525011&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "The University of Chicago; The University of Chicago; Toyota Technological Institute", "aff_domain": "uchicago.edu;uchicago.edu;ttic.edu", "email": "uchicago.edu;uchicago.edu;ttic.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a3fc981af450752046be179185ebc8b5-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Chicago;Toyota Technological Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.uchicago.edu;https://www.tti.ac.jp", "aff_unique_abbr": "UChicago;TTI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Japan" }, { "title": "Cluster Variational Approximations for Structure Learning of Continuous-Time Bayesian Networks from Incomplete Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11756", "id": "11756", "author_site": "Dominik Linzner, Heinz Koeppl", "author": "Dominik Linzner; Heinz Koeppl", "abstract": "Continuous-time Bayesian networks (CTBNs) constitute a general and powerful framework for modeling continuous-time stochastic processes on networks. This makes them particularly attractive for learning the directed structures among interacting entities. However, if the available data is incomplete, one needs to simulate the prohibitively complex CTBN dynamics. Existing approximation techniques, such as sampling and low-order variational methods, either scale unfavorably in system size, or are unsatisfactory in terms of accuracy. Inspired by recent advances in statistical physics, we present a new approximation scheme based on cluster-variational methods that significantly improves upon existing variational approximations. We can analytically marginalize the parameters of the approximate CTBN, as these are of secondary importance for structure learning. This recovers a scalable scheme for direct structure learning from incomplete and noisy time-series data. Our approach outperforms existing methods in terms of scalability.", "bibtex": "@inproceedings{NEURIPS2018_b607aa5b,\n author = {Linzner, Dominik and Koeppl, Heinz},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Cluster Variational Approximations for Structure Learning of Continuous-Time Bayesian Networks from Incomplete Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b607aa5b2fd58dd860bfb55619389982-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b607aa5b2fd58dd860bfb55619389982-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b607aa5b2fd58dd860bfb55619389982-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b607aa5b2fd58dd860bfb55619389982-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b607aa5b2fd58dd860bfb55619389982-Reviews.html", "metareview": "", "pdf_size": 2486137, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14469102295798471492&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Department of Electrical Engineering and Information Technology; Department of Electrical Engineering and Information Technology + Department of Biology", "aff_domain": "bcs.tu-darmstadt.de;bcs.tu-darmstadt.de", "email": "bcs.tu-darmstadt.de;bcs.tu-darmstadt.de", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b607aa5b2fd58dd860bfb55619389982-Abstract.html", "aff_unique_index": "0;0+1", "aff_unique_norm": "Unknown University;Institution not specified", "aff_unique_dep": "Department of Electrical Engineering and Information Technology;Department of Biology", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Unknown;" }, { "title": "Clustering Redemption\u2013Beyond the Impossibility of Kleinberg\u2019s Axioms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11814", "id": "11814", "author_site": "Vincent Cohen-Addad, Varun Kanade, Frederik Mallmann-Trenn", "author": "Vincent Cohen-Addad; Varun Kanade; Frederik Mallmann-Trenn", "abstract": "Kleinberg (2002) stated three axioms that any clustering procedure should satisfy and showed there is no clustering procedure that simultaneously satisfies all three. One of these, called the consistency axiom, requires that when the data is modified in a helpful way, i.e. if points in the same cluster are made more similar and those in different ones made less similar, the algorithm should output the same clustering. To circumvent this impossibility result, research has focused on considering clustering procedures that have a clustering quality measure (or a cost) and showing that a modification of Kleinberg\u2019s axioms that takes cost into account lead to feasible clustering procedures. In this work, we take a different approach, based on the observation that the consistency axiom fails to be satisfied when the \u201ccorrect\u201d number of clusters changes. We modify this axiom by making use of cost functions to determine the correct number of clusters, and require that consistency holds only if the number of clusters remains unchanged. We show that single linkage satisfies the modified axioms, and if the input is well-clusterable, some popular procedures such as k-means also satisfy the axioms, taking a step towards explaining the success of these objective functions for guiding the design of algorithms.", "bibtex": "@inproceedings{NEURIPS2018_6fbd841e,\n author = {Cohen-Addad, Vincent and Kanade, Varun and Mallmann-Trenn, Frederik},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Clustering Redemption\\textendash Beyond the Impossibility of Kleinberg\u2019s Axioms},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6fbd841e2e4b2938351a4f9b68f12e6b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6fbd841e2e4b2938351a4f9b68f12e6b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6fbd841e2e4b2938351a4f9b68f12e6b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6fbd841e2e4b2938351a4f9b68f12e6b-Reviews.html", "metareview": "", "pdf_size": 480569, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17204060849133079509&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Sorbonne Universit\u00e9s, UPMC Univ Paris 06, CNRS, LIP6; University of Oxford; MIT", "aff_domain": "lip6.fr;cs.ox.ac.uk;mit.edu", "email": "lip6.fr;cs.ox.ac.uk;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6fbd841e2e4b2938351a4f9b68f12e6b-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Sorbonne Universit\u00e9s;University of Oxford;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sorbonne-universite.fr;https://www.ox.ac.uk;https://web.mit.edu", "aff_unique_abbr": "Sorbonne;Oxford;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "France;United Kingdom;United States" }, { "title": "Co-regularized Alignment for Unsupervised Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11890", "id": "11890", "author_site": "Abhishek Kumar, Prasanna Sattigeri, Kahini Wadhawan, Leonid Karlinsky, Rogerio Feris, Bill Freeman, Gregory Wornell", "author": "Abhishek Kumar; Prasanna Sattigeri; Kahini Wadhawan; Leonid Karlinsky; Rogerio Feris; Bill Freeman; Gregory Wornell", "abstract": "Deep neural networks, trained with large amount of labeled data, can fail to\ngeneralize well when tested with examples from a target domain whose distribution differs from the training data distribution, referred as the source domain. It can be expensive or even infeasible to obtain required amount of labeled data in all possible domains. Unsupervised domain adaptation sets out to address this problem, aiming to learn a good predictive model for the target domain using labeled examples from the source domain but only unlabeled examples from the target domain. \nDomain alignment approaches this problem by matching the source and target feature distributions, and has been used as a key component in many state-of-the-art domain adaptation methods. However, matching the marginal feature distributions does not guarantee that the corresponding class conditional distributions will be aligned across the two domains. We propose co-regularized domain alignment for unsupervised domain adaptation, which constructs multiple diverse feature spaces and aligns source and target distributions in each of them individually, while encouraging that alignments agree with each other with regard to the class predictions on the unlabeled target examples.\nThe proposed method is generic and can be used to improve any domain adaptation method which uses domain alignment. We instantiate it in the context of a recent state-of-the-art method and \nobserve that it provides significant performance improvements on several domain adaptation benchmarks.", "bibtex": "@inproceedings{NEURIPS2018_99607461,\n author = {Kumar, Abhishek and Sattigeri, Prasanna and Wadhawan, Kahini and Karlinsky, Leonid and Feris, Rogerio and Freeman, Bill and Wornell, Gregory},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Co-regularized Alignment for Unsupervised Domain Adaptation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/99607461cdb9c26e2bd5f31b12dcf27a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/99607461cdb9c26e2bd5f31b12dcf27a-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/99607461cdb9c26e2bd5f31b12dcf27a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/99607461cdb9c26e2bd5f31b12dcf27a-Reviews.html", "metareview": "", "pdf_size": 617728, "gs_citation": 215, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14261001255030725527&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "MIT-IBM Watson AI Lab, IBM Research; MIT-IBM Watson AI Lab, IBM Research; MIT-IBM Watson AI Lab, IBM Research; MIT-IBM Watson AI Lab, IBM Research; MIT-IBM Watson AI Lab, IBM Research; MIT; MIT", "aff_domain": "us.ibm.com;us.ibm.com;ibm.com;il.ibm.com;us.ibm.com;mit.edu;mit.edu", "email": "us.ibm.com;us.ibm.com;ibm.com;il.ibm.com;us.ibm.com;mit.edu;mit.edu", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/99607461cdb9c26e2bd5f31b12dcf27a-Abstract.html", "aff_unique_index": "0;0;0;0;0;1;1", "aff_unique_norm": "IBM;Massachusetts Institute of Technology", "aff_unique_dep": "AI Lab;", "aff_unique_url": "https://www.ibmwatsonai.org/;https://web.mit.edu", "aff_unique_abbr": "MIT-IBM AI Lab;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Co-teaching: Robust training of deep neural networks with extremely noisy labels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11815", "id": "11815", "author_site": "Bo Han, Quanming Yao, Xingrui Yu, Gang Niu, Miao Xu, Weihua Hu, Ivor Tsang, Masashi Sugiyama", "author": "Bo Han; Quanming Yao; Xingrui Yu; Gang Niu; Miao Xu; Weihua Hu; Ivor Tsang; Masashi Sugiyama", "abstract": "Deep learning with noisy labels is practically challenging, as the capacity of deep models is so high that they can totally memorize these noisy labels sooner or later during training. Nonetheless, recent studies on the memorization effects of deep neural networks show that they would first memorize training data of clean labels and then those of noisy labels. Therefore in this paper, we propose a new deep learning paradigm called ''Co-teaching'' for combating with noisy labels. Namely, we train two deep neural networks simultaneously, and let them teach each other given every mini-batch: firstly, each network feeds forward all data and selects some data of possibly clean labels; secondly, two networks communicate with each other what data in this mini-batch should be used for training; finally, each network back propagates the data selected by its peer network and updates itself. Empirical results on noisy versions of MNIST, CIFAR-10 and CIFAR-100 demonstrate that Co-teaching is much superior to the state-of-the-art methods in the robustness of trained deep models.", "bibtex": "@inproceedings{NEURIPS2018_a19744e2,\n author = {Han, Bo and Yao, Quanming and Yu, Xingrui and Niu, Gang and Xu, Miao and Hu, Weihua and Tsang, Ivor and Sugiyama, Masashi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Co-teaching: Robust training of deep neural networks with extremely noisy labels},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a19744e268754fb0148b017647355b7b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a19744e268754fb0148b017647355b7b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a19744e268754fb0148b017647355b7b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a19744e268754fb0148b017647355b7b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a19744e268754fb0148b017647355b7b-Reviews.html", "metareview": "", "pdf_size": 1145213, "gs_citation": 2664, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1619874673011079691&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Centre for Artificial Intelligence, University of Technology Sydney + RIKEN; RIKEN + Paradigm Inc.; Centre for Artificial Intelligence, University of Technology Sydney; RIKEN; RIKEN; Stanford University; Centre for Artificial Intelligence, University of Technology Sydney; RIKEN + University of Tokyo", "aff_domain": ";;;;;;;", "email": ";;;;;;;", "github": "https://github.com/bhanML/Co-teaching", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a19744e268754fb0148b017647355b7b-Abstract.html", "aff_unique_index": "0+1;1+2;0;1;1;3;0;1+4", "aff_unique_norm": "University of Technology Sydney;RIKEN;Paradigm Inc.;Stanford University;University of Tokyo", "aff_unique_dep": "Centre for Artificial Intelligence;;;;", "aff_unique_url": "https://www.uts.edu.au;https://www.riken.jp;;https://www.stanford.edu;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTS;RIKEN;;Stanford;UTokyo", "aff_campus_unique_index": "0;;0;2;0;", "aff_campus_unique": "Sydney;;Stanford", "aff_country_unique_index": "0+1;1+2;0;1;1;2;0;1+1", "aff_country_unique": "Australia;Japan;United States" }, { "title": "Collaborative Learning for Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11196", "id": "11196", "author_site": "Guocong Song, Wei Chai", "author": "Guocong Song; Wei Chai", "abstract": "We introduce collaborative learning in which multiple classifier heads of the same network are simultaneously trained on the same training data to improve generalization and robustness to label noise with no extra inference cost. It acquires the strengths from auxiliary training, multi-task learning and knowledge distillation. There are two important mechanisms involved in collaborative learning. First, the consensus of multiple views from different classifier heads on the same example provides supplementary information as well as regularization to each classifier, thereby improving generalization. Second, intermediate-level representation (ILR) sharing with backpropagation rescaling aggregates the gradient flows from all heads, which not only reduces training computational complexity, but also facilitates supervision to the shared layers. The empirical results on CIFAR and ImageNet datasets demonstrate that deep neural networks learned as a group in a collaborative way significantly reduce the generalization error and increase the robustness to label noise.", "bibtex": "@inproceedings{NEURIPS2018_430c3626,\n author = {Song, Guocong and Chai, Wei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Collaborative Learning for Deep Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/430c3626b879b4005d41b8a46172e0c0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/430c3626b879b4005d41b8a46172e0c0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/430c3626b879b4005d41b8a46172e0c0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/430c3626b879b4005d41b8a46172e0c0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/430c3626b879b4005d41b8a46172e0c0-Reviews.html", "metareview": "", "pdf_size": 473009, "gs_citation": 250, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17853910683874825851&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Playground Global, Palo Alto, CA 94306; Google, Mountain View, CA 94043", "aff_domain": "gmail.com;google.com", "email": "gmail.com;google.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/430c3626b879b4005d41b8a46172e0c0-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Playground Global;Google", "aff_unique_dep": ";Google", "aff_unique_url": ";https://www.google.com", "aff_unique_abbr": ";Google", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Palo Alto;Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Combinatorial Optimization with Graph Convolutional Networks and Guided Tree Search", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11077", "id": "11077", "author_site": "Zhuwen Li, Qifeng Chen, Vladlen Koltun", "author": "Zhuwen Li; Qifeng Chen; Vladlen Koltun", "abstract": "We present a learning-based approach to computing solutions for certain NP-hard problems. Our approach combines deep learning techniques with useful algorithmic elements from classic heuristics. The central component is a graph convolutional network that is trained to estimate the likelihood, for each vertex in a graph, of whether this vertex is part of the optimal solution. The network is designed and trained to synthesize a diverse set of solutions, which enables rapid exploration of the solution space via tree search. The presented approach is evaluated on four canonical NP-hard problems and five datasets, which include benchmark satisfiability problems and real social network graphs with up to a hundred thousand nodes. Experimental results demonstrate that the presented approach substantially outperforms recent deep learning work, and performs on par with highly optimized state-of-the-art heuristic solvers for some NP-hard problems. Experiments indicate that our approach generalizes across datasets, and scales to graphs that are orders of magnitude larger than those used during training.", "bibtex": "@inproceedings{NEURIPS2018_8d3bba74,\n author = {Li, Zhuwen and Chen, Qifeng and Koltun, Vladlen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Combinatorial Optimization with Graph Convolutional Networks and Guided Tree Search},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8d3bba7425e7c98c50f52ca1b52d3735-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8d3bba7425e7c98c50f52ca1b52d3735-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8d3bba7425e7c98c50f52ca1b52d3735-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8d3bba7425e7c98c50f52ca1b52d3735-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8d3bba7425e7c98c50f52ca1b52d3735-Reviews.html", "metareview": "", "pdf_size": 549929, "gs_citation": 637, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17227236859327607760&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8d3bba7425e7c98c50f52ca1b52d3735-Abstract.html" }, { "title": "Communication Compression for Decentralized Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11735", "id": "11735", "author_site": "Hanlin Tang, Shaoduo Gan, Ce Zhang, Tong Zhang, Ji Liu", "author": "Hanlin Tang; Shaoduo Gan; Ce Zhang; Tong Zhang; Ji Liu", "abstract": "Optimizing distributed learning systems is an art\nof balancing between computation and communication.\nThere have been two lines of research that try to\ndeal with slower networks: {\\em communication \ncompression} for\nlow bandwidth networks, and {\\em decentralization} for\nhigh latency networks. In this paper, We explore\na natural question: {\\em can the combination\nof both techniques lead to\na system that is robust to both bandwidth\nand latency?}\n\nAlthough the system implication of such combination\nis trivial, the underlying theoretical principle and\nalgorithm design is challenging: unlike centralized algorithms, simply compressing\n{\\rc exchanged information,\neven in an unbiased stochastic way, \nwithin the decentralized network would accumulate the error and cause divergence.} \nIn this paper, we develop\na framework of quantized, decentralized training and\npropose two different strategies, which we call\n{\\em extrapolation compression} and {\\em difference compression}.\nWe analyze both algorithms and prove \nboth converge at the rate of $O(1/\\sqrt{nT})$ \nwhere $n$ is the number of workers and $T$ is the\nnumber of iterations, matching the convergence rate for\nfull precision, centralized training. We validate \nour algorithms and find that our proposed algorithm outperforms\nthe best of merely decentralized and merely quantized\nalgorithm significantly for networks with {\\em both} \nhigh latency and low bandwidth.", "bibtex": "@inproceedings{NEURIPS2018_44feb009,\n author = {Tang, Hanlin and Gan, Shaoduo and Zhang, Ce and Zhang, Tong and Liu, Ji},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Communication Compression for Decentralized Training},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/44feb0096faa8326192570788b38c1d1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/44feb0096faa8326192570788b38c1d1-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/44feb0096faa8326192570788b38c1d1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/44feb0096faa8326192570788b38c1d1-Reviews.html", "metareview": "", "pdf_size": 617270, "gs_citation": 332, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=851985872121593149&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science, University of Rochester; Department of Computer Science, ETH Zurich; Tencent AI Lab; Tencent AI Lab; Department of Computer Science, University of Rochester + Tencent AI Lab", "aff_domain": "ur.rochester.edu;inf.ethz.ch;inf.ethz.ch;tongzhang-ml.org;gmail.com", "email": "ur.rochester.edu;inf.ethz.ch;inf.ethz.ch;tongzhang-ml.org;gmail.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/44feb0096faa8326192570788b38c1d1-Abstract.html", "aff_unique_index": "0;1;2;2;0+2", "aff_unique_norm": "University of Rochester;ETH Zurich;Tencent", "aff_unique_dep": "Department of Computer Science;Department of Computer Science;Tencent AI Lab", "aff_unique_url": "https://www.rochester.edu;https://www.ethz.ch;https://ai.tencent.com", "aff_unique_abbr": "U of R;ETHZ;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;0+2", "aff_country_unique": "United States;Switzerland;China" }, { "title": "Communication Efficient Parallel Algorithms for Optimization on Manifolds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11358", "id": "11358", "author_site": "Bayan Saparbayeva, Michael Zhang, Lizhen Lin", "author": "Bayan Saparbayeva; Michael Zhang; Lizhen Lin", "abstract": "The last decade has witnessed an explosion in the development of models, theory and computational algorithms for ``big data'' analysis. In particular, distributed inference has served as a natural and dominating paradigm for statistical inference. However, the existing literature on parallel inference almost exclusively focuses on Euclidean data and parameters. While this assumption is valid for many applications, it is increasingly more common to encounter problems where the data or the parameters lie on a non-Euclidean space, like a manifold for example. Our work aims to fill a critical gap in the literature by generalizing parallel inference algorithms to optimization on manifolds. We show that our proposed algorithm is both communication efficient and carries theoretical convergence guarantees. In addition, we demonstrate the performance of our algorithm to the estimation of Fr\\'echet means on simulated spherical data and the low-rank matrix completion problem over Grassmann manifolds applied to the Netflix prize data set.", "bibtex": "@inproceedings{NEURIPS2018_f4a4da9a,\n author = {Saparbayeva, Bayan and Zhang, Michael and Lin, Lizhen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Communication Efficient Parallel Algorithms for Optimization on Manifolds},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f4a4da9aa7eadfd23c7bdb7cf57b3112-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f4a4da9aa7eadfd23c7bdb7cf57b3112-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f4a4da9aa7eadfd23c7bdb7cf57b3112-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f4a4da9aa7eadfd23c7bdb7cf57b3112-Reviews.html", "metareview": "", "pdf_size": 273622, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17982007147562340725&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Department of Applied and Computational Mathematics and Statistics, University of Notre Dame; Department of Computer Science, Princeton University; Department of Applied and Computational Mathematics and Statistics, University of Notre Dame", "aff_domain": "nd.edu;cs.princeton.edu;nd.edu", "email": "nd.edu;cs.princeton.edu;nd.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f4a4da9aa7eadfd23c7bdb7cf57b3112-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Notre Dame;Princeton University", "aff_unique_dep": "Department of Applied and Computational Mathematics and Statistics;Department of Computer Science", "aff_unique_url": "https://www.nd.edu;https://www.princeton.edu", "aff_unique_abbr": "Notre Dame;Princeton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Notre Dame;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Community Exploration: From Offline Optimization to Online Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11535", "id": "11535", "author_site": "Xiaowei Chen, Weiran Huang, Wei Chen, John C. S. Lui", "author": "Xiaowei Chen; Weiran Huang; Wei Chen; John C. S. Lui", "abstract": "We introduce the community exploration problem that has various real-world applications such as online advertising. In the problem, an explorer allocates limited budget to explore communities so as to maximize the number of members he could meet. We provide a systematic study of the community exploration problem, from offline optimization to online learning. For the offline setting where the sizes of communities are known, we prove that the greedy methods for both of non-adaptive exploration and adaptive exploration are optimal. For the online setting where the sizes of communities are not known and need to be learned from the multi-round explorations, we propose an ``upper confidence'' like algorithm that achieves the logarithmic regret bounds. By combining the feedback from different rounds, we can achieve a constant regret bound.", "bibtex": "@inproceedings{NEURIPS2018_c60d870e,\n author = {Chen, Xiaowei and Huang, Weiran and Chen, Wei and Lui, John C. S.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Community Exploration: From Offline Optimization to Online Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c60d870eaad6a3946ab3e8734466e532-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c60d870eaad6a3946ab3e8734466e532-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c60d870eaad6a3946ab3e8734466e532-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c60d870eaad6a3946ab3e8734466e532-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c60d870eaad6a3946ab3e8734466e532-Reviews.html", "metareview": "", "pdf_size": 444543, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3138770917072498202&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "The Chinese University of Hong Kong; Huawei Noah\u2019s Ark Lab + Microsoft Research; Microsoft Research; The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;outlook.com;microsoft.com", "email": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;outlook.com;microsoft.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c60d870eaad6a3946ab3e8734466e532-Abstract.html", "aff_unique_index": "0;1+2;2;0", "aff_unique_norm": "Chinese University of Hong Kong;Huawei;Microsoft", "aff_unique_dep": ";Noah\u2019s Ark Lab;Microsoft Research", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.huawei.com;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "CUHK;Huawei;MSR", "aff_campus_unique_index": "0;;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0+1;1;0", "aff_country_unique": "China;United States" }, { "title": "Compact Generalized Non-local Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11629", "id": "11629", "author_site": "Kaiyu Yue, Ming Sun, Yuchen Yuan, Feng Zhou, Errui Ding, Fuxin Xu", "author": "Kaiyu Yue; Ming Sun; Yuchen Yuan; Feng Zhou; Errui Ding; Fuxin Xu", "abstract": "The non-local module is designed for capturing long-range spatio-temporal dependencies in images and videos. Although having shown excellent performance, it lacks the mechanism to model the interactions between positions across channels, which are of vital importance in recognizing fine-grained objects and actions. To address this limitation, we generalize the non-local module and take the correlations between the positions of any two channels into account. This extension utilizes the compact representation for multiple kernel functions with Taylor expansion that makes the generalized non-local module in a fast and low-complexity computation flow. Moreover, we implement our generalized non-local method within channel groups to ease the optimization. Experimental results illustrate the clear-cut improvements and practical applicability of the generalized non-local module on both fine-grained object recognition and video classification. Code is available at: https://github.com/KaiyuYue/cgnl-network.pytorch.", "bibtex": "@inproceedings{NEURIPS2018_907edb0a,\n author = {Yue, Kaiyu and Sun, Ming and Yuan, Yuchen and Zhou, Feng and Ding, Errui and Xu, Fuxin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Compact Generalized Non-local Network},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/907edb0aa6986220dbffb79a788596ee-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/907edb0aa6986220dbffb79a788596ee-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/907edb0aa6986220dbffb79a788596ee-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/907edb0aa6986220dbffb79a788596ee-Reviews.html", "metareview": "", "pdf_size": 4486482, "gs_citation": 218, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12004705320658184806&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Baidu VIS; Baidu VIS; Baidu VIS; Baidu Research; Baidu VIS; Central South University", "aff_domain": "baidu.com;baidu.com;baidu.com;baidu.com;baidu.com;csu.edu.cn", "email": "baidu.com;baidu.com;baidu.com;baidu.com;baidu.com;csu.edu.cn", "github": "https://github.com/KaiyuYue/cgnl-network.pytorch", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/907edb0aa6986220dbffb79a788596ee-Abstract.html", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Baidu;Central South University", "aff_unique_dep": "Baidu Visualization;", "aff_unique_url": "https://www.baidu.com;https://www.csu.edu.cn", "aff_unique_abbr": "Baidu;CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Compact Representation of Uncertainty in Clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11824", "id": "11824", "author_site": "Craig Greenberg, Nicholas Monath, Ari Kobren, Patrick Flaherty, Andrew McGregor, Andrew McCallum", "author": "Craig Greenberg; Nicholas Monath; Ari Kobren; Patrick Flaherty; Andrew McGregor; Andrew McCallum", "abstract": "For many classic structured prediction problems, probability distributions over the dependent variables can be efficiently computed using widely-known algorithms and data structures (such as forward-backward, and its corresponding trellis for exact probability distributions in Markov models). However, we know of no previous work studying efficient representations of exact distributions over clusterings. This paper presents definitions and proofs for a dynamic-programming inference procedure that computes the partition function, the marginal probability of a cluster, and the MAP clustering---all exactly. Rather than the Nth Bell number, these exact solutions take time and space proportional to the substantially smaller powerset of N. Indeed, we improve upon the time complexity of the algorithm introduced by Kohonen and Corander (2016) for this problem by a factor of N. While still large, this previously unknown result is intellectually interesting in its own right, makes feasible exact inference for important real-world small data applications (such as medicine), and provides a natural stepping stone towards sparse-trellis approximations that enable further scalability (which we also explore). In experiments, we demonstrate the superiority of our approach over approximate methods in analyzing real-world gene expression data used in cancer treatment.", "bibtex": "@inproceedings{NEURIPS2018_29c4a0e4,\n author = {Greenberg, Craig and Monath, Nicholas and Kobren, Ari and Flaherty, Patrick and McGregor, Andrew and McCallum, Andrew},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Compact Representation of Uncertainty in Clustering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/29c4a0e4ef7d1969a94a5f4aadd20690-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/29c4a0e4ef7d1969a94a5f4aadd20690-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/29c4a0e4ef7d1969a94a5f4aadd20690-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/29c4a0e4ef7d1969a94a5f4aadd20690-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/29c4a0e4ef7d1969a94a5f4aadd20690-Reviews.html", "metareview": "", "pdf_size": 2703301, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1538909383481471007&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "College of Information and Computer Sciences, University of Massachusetts Amherst + National Institute of Standards and Technology; College of Information and Computer Sciences, University of Massachusetts Amherst; College of Information and Computer Sciences, University of Massachusetts Amherst; Department of Mathematics and Statistics, University of Massachusetts Amherst; College of Information and Computer Sciences, University of Massachusetts Amherst; College of Information and Computer Sciences, University of Massachusetts Amherst", "aff_domain": "cs.umass.edu;cs.umass.edu;cs.umass.edu;math.umass.edu;cs.umass.edu;cs.umass.edu", "email": "cs.umass.edu;cs.umass.edu;cs.umass.edu;math.umass.edu;cs.umass.edu;cs.umass.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/29c4a0e4ef7d1969a94a5f4aadd20690-Abstract.html", "aff_unique_index": "0+1;0;0;0;0;0", "aff_unique_norm": "University of Massachusetts Amherst;National Institute of Standards and Technology", "aff_unique_dep": "College of Information and Computer Sciences;", "aff_unique_url": "https://www.umass.edu;https://www.nist.gov", "aff_unique_abbr": "UMass Amherst;NIST", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0+0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Completing State Representations using Spectral Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11428", "id": "11428", "author_site": "Nan Jiang, Alex Kulesza, Satinder Singh", "author": "Nan Jiang; Alex Kulesza; Satinder Singh", "abstract": "A central problem in dynamical system modeling is state discovery\u2014that is, finding a compact summary of the past that captures the information needed to predict the future. Predictive State Representations (PSRs) enable clever spectral methods for state discovery; however, while consistent in the limit of infinite data, these methods often suffer from poor performance in the low data regime. In this paper we develop a novel algorithm for incorporating domain knowledge, in the form of an imperfect state representation, as side information to speed spectral learning for PSRs. We prove theoretical results characterizing the relevance of a user-provided state representation, and design spectral algorithms that can take advantage of a relevant representation. Our algorithm utilizes principal angles to extract the relevant components of the representation, and is robust to misspecification. Empirical evaluation on synthetic HMMs, an aircraft identification domain, and a gene splice dataset shows that, even with weak domain knowledge, the algorithm can significantly outperform standard PSR learning.", "bibtex": "@inproceedings{NEURIPS2018_51174add,\n author = {Jiang, Nan and Kulesza, Alex and Singh, Satinder},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Completing State Representations using Spectral Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/51174add1c52758f33d414ceaf3fe6ba-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/51174add1c52758f33d414ceaf3fe6ba-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/51174add1c52758f33d414ceaf3fe6ba-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/51174add1c52758f33d414ceaf3fe6ba-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/51174add1c52758f33d414ceaf3fe6ba-Reviews.html", "metareview": "", "pdf_size": 582270, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1417771810272957724&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "UIUC; Google Research; University of Michigan", "aff_domain": "illinois.edu;google.com;umich.edu", "email": "illinois.edu;google.com;umich.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/51174add1c52758f33d414ceaf3fe6ba-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Google;University of Michigan", "aff_unique_dep": ";Google Research;", "aff_unique_url": "https://www illinois.edu;https://research.google;https://www.umich.edu", "aff_unique_abbr": "UIUC;Google Research;UM", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Urbana-Champaign;Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Complex Gated Recurrent Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11996", "id": "11996", "author_site": "Moritz Wolter, Angela Yao", "author": "Moritz Wolter; Angela Yao", "abstract": "Complex numbers have long been favoured for digital signal processing, yet\ncomplex representations rarely appear in deep learning architectures. RNNs, widely\nused to process time series and sequence information, could greatly benefit from\ncomplex representations. We present a novel complex gated recurrent cell, which\nis a hybrid cell combining complex-valued and norm-preserving state transitions\nwith a gating mechanism. The resulting RNN exhibits excellent stability and\nconvergence properties and performs competitively on the synthetic memory and\nadding task, as well as on the real-world tasks of human motion prediction.", "bibtex": "@inproceedings{NEURIPS2018_652cf383,\n author = {Wolter, Moritz and Yao, Angela},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Complex Gated Recurrent Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/652cf38361a209088302ba2b8b7f51e0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/652cf38361a209088302ba2b8b7f51e0-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/652cf38361a209088302ba2b8b7f51e0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/652cf38361a209088302ba2b8b7f51e0-Reviews.html", "metareview": "", "pdf_size": 572769, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10862653902258650151&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Institute for Computer Science, University of Bonn; School of Computing, National University of Singapore", "aff_domain": "cs.uni-bonn.de;comp.nus.edu.sg", "email": "cs.uni-bonn.de;comp.nus.edu.sg", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/652cf38361a209088302ba2b8b7f51e0-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University of Bonn;National University of Singapore", "aff_unique_dep": "Institute for Computer Science;School of Computing", "aff_unique_url": "https://www.uni-bonn.de;https://www.nus.edu.sg", "aff_unique_abbr": ";NUS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Singapore", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;Singapore" }, { "title": "Computationally and statistically efficient learning of causal Bayes nets using path queries", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12033", "id": "12033", "author_site": "Kevin Bello, Jean Honorio", "author": "Kevin Bello; Jean Honorio", "abstract": "Causal discovery from empirical data is a fundamental problem in many scientific domains. Observational data allows for identifiability only up to Markov equivalence class. In this paper we first propose a polynomial time algorithm for learning the exact correctly-oriented structure of the transitive reduction of any causal Bayesian network with high probability, by using interventional path queries. Each path query takes as input an origin node and a target node, and answers whether there is a directed path from the origin to the target. This is done by intervening on the origin node and observing samples from the target node. We theoretically show the logarithmic sample complexity for the size of interventional data per path query, for continuous and discrete networks. We then show how to learn the transitive edges using also logarithmic sample complexity (albeit in time exponential in the maximum number of parents for discrete networks), which allows us to learn the full network. We further extend our work by reducing the number of interventional path queries for learning rooted trees. We also provide an analysis of imperfect interventions.", "bibtex": "@inproceedings{NEURIPS2018_a0b45d1b,\n author = {Bello, Kevin and Honorio, Jean},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Computationally and statistically efficient learning of causal Bayes nets using path queries},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a0b45d1bb84fe1bedbb8449764c4d5d5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a0b45d1bb84fe1bedbb8449764c4d5d5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a0b45d1bb84fe1bedbb8449764c4d5d5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a0b45d1bb84fe1bedbb8449764c4d5d5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a0b45d1bb84fe1bedbb8449764c4d5d5-Reviews.html", "metareview": "", "pdf_size": 464682, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9648481895517855356&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Computer Science, Purdue University; Department of Computer Science, Purdue University", "aff_domain": "purdue.edu;purdue.edu", "email": "purdue.edu;purdue.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a0b45d1bb84fe1bedbb8449764c4d5d5-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Computing Higher Order Derivatives of Matrix and Tensor Expressions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11282", "id": "11282", "author_site": "S\u00f6ren Laue, Matthias Mitterreiter, Joachim Giesen", "author": "Soeren Laue; Matthias Mitterreiter; Joachim Giesen", "abstract": "Optimization is an integral part of most machine learning systems and most numerical optimization schemes rely on the computation of derivatives. Therefore, frameworks for computing derivatives are an active area of machine learning research. Surprisingly, as of yet, no existing framework is capable of computing higher order matrix and tensor derivatives directly. Here, we close this fundamental gap and present an algorithmic framework for computing matrix and tensor derivatives that extends seamlessly to higher order derivatives. The framework can be used for symbolic as well as for forward and reverse mode automatic differentiation. Experiments show a speedup between one and four orders of magnitude over state-of-the-art frameworks when evaluating higher order derivatives.", "bibtex": "@inproceedings{NEURIPS2018_0a1bf96b,\n author = {Laue, Soeren and Mitterreiter, Matthias and Giesen, Joachim},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Computing Higher Order Derivatives of Matrix and Tensor Expressions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0a1bf96b7165e962e90cb14648c9462d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0a1bf96b7165e962e90cb14648c9462d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0a1bf96b7165e962e90cb14648c9462d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0a1bf96b7165e962e90cb14648c9462d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0a1bf96b7165e962e90cb14648c9462d-Reviews.html", "metareview": "", "pdf_size": 504793, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=701438258939359150&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Friedrich-Schiller-Universit\u00e4t Jena; Friedrich-Schiller-Universit\u00e4t Jena; Friedrich-Schiller-Universit\u00e4t Jena", "aff_domain": "uni-jena.de;uni-jena.de;uni-jena.de", "email": "uni-jena.de;uni-jena.de;uni-jena.de", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0a1bf96b7165e962e90cb14648c9462d-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Friedrich-Schiller-Universit\u00e4t", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-jena.de", "aff_unique_abbr": "FSU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Jena", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Computing Kantorovich-Wasserstein Distances on $d$-dimensional histograms using $(d+1)$-partite graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11564", "id": "11564", "author_site": "Gennaro Auricchio, Federico Bassetti, Stefano Gualandi, Marco Veneroni", "author": "Gennaro Auricchio; Federico Bassetti; Stefano Gualandi; Marco Veneroni", "abstract": "This paper presents a novel method to compute the exact Kantorovich-Wasserstein distance between a pair of $d$-dimensional histograms having $n$ bins each. We prove that this problem is equivalent to an uncapacitated minimum cost flow problem on a $(d+1)$-partite graph with $(d+1)n$ nodes and $dn^{\\frac{d+1}{d}}$ arcs, whenever the cost is separable along the principal $d$-dimensional directions. We show numerically the benefits of our approach by computing the Kantorovich-Wasserstein distance of order 2 among two sets of instances: gray scale images and $d$-dimensional biomedical histograms. On these types of instances, our approach is competitive with state-of-the-art optimal transport algorithms.", "bibtex": "@inproceedings{NEURIPS2018_b19aa25f,\n author = {Auricchio, Gennaro and Bassetti, Federico and Gualandi, Stefano and Veneroni, Marco},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Computing Kantorovich-Wasserstein Distances on d-dimensional histograms using (d+1)-partite graphs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b19aa25ff58940d974234b48391b9549-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b19aa25ff58940d974234b48391b9549-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b19aa25ff58940d974234b48391b9549-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b19aa25ff58940d974234b48391b9549-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b19aa25ff58940d974234b48391b9549-Reviews.html", "metareview": "", "pdf_size": 4249211, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11692948010277305024&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Universit\u00e0 degli Studi di Pavia, Dipartimento di Matematica \u201cF. Casorati\u201d; Universit\u00e0 degli Studi di Pavia, Dipartimento di Matematica \u201cF. Casorati\u201d; Universit\u00e0 degli Studi di Pavia, Dipartimento di Matematica \u201cF. Casorati\u201d; Politecnico di Milano, Dipartimento di Matematica", "aff_domain": "universitadipavia.it;unipv.it;unipv.it;polimi.it", "email": "universitadipavia.it;unipv.it;unipv.it;polimi.it", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b19aa25ff58940d974234b48391b9549-Abstract.html", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Universit\u00e0 degli Studi di Pavia;Politecnico di Milano", "aff_unique_dep": "Dipartimento di Matematica \u201cF. Casorati\u201d;Dipartimento di Matematica", "aff_unique_url": "https://www.unipv.eu;https://www.polimi.it", "aff_unique_abbr": ";Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Italy" }, { "title": "Conditional Adversarial Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11178", "id": "11178", "author_site": "Mingsheng Long, ZHANGJIE CAO, Jianmin Wang, Michael Jordan", "author": "Mingsheng Long; ZHANGJIE CAO; Jianmin Wang; Michael I Jordan", "abstract": "Adversarial learning has been embedded into deep networks to learn disentangled and transferable representations for domain adaptation. Existing adversarial domain adaptation methods may struggle to align different domains of multimodal distributions that are native in classification problems. In this paper, we present conditional adversarial domain adaptation, a principled framework that conditions the adversarial adaptation models on discriminative information conveyed in the classifier predictions. Conditional domain adversarial networks (CDANs) are designed with two novel conditioning strategies: multilinear conditioning that captures the cross-covariance between feature representations and classifier predictions to improve the discriminability, and entropy conditioning that controls the uncertainty of classifier predictions to guarantee the transferability. Experiments testify that the proposed approach exceeds the state-of-the-art results on five benchmark datasets.", "bibtex": "@inproceedings{NEURIPS2018_ab88b157,\n author = {Long, Mingsheng and CAO, ZHANGJIE and Wang, Jianmin and Jordan, Michael I},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Conditional Adversarial Domain Adaptation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ab88b15733f543179858600245108dd8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ab88b15733f543179858600245108dd8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ab88b15733f543179858600245108dd8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ab88b15733f543179858600245108dd8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ab88b15733f543179858600245108dd8-Reviews.html", "metareview": "", "pdf_size": 1419669, "gs_citation": 2951, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=951003799487024572&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "School of Software, Tsinghua University, China + KLiss, MOE; BNRist; Research Center for Big Data, Tsinghua University, China; University of California, Berkeley, Berkeley, USA", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;gmail.com;berkeley.edu", "email": "tsinghua.edu.cn;tsinghua.edu.cn;gmail.com;berkeley.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ab88b15733f543179858600245108dd8-Abstract.html", "aff_unique_index": "0+1;2;0;3", "aff_unique_norm": "Tsinghua University;Ministry of Education;BNRist;University of California, Berkeley", "aff_unique_dep": "School of Software;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;;;https://www.berkeley.edu", "aff_unique_abbr": "THU;MOE;;UC Berkeley", "aff_campus_unique_index": ";1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0+1;0;3", "aff_country_unique": "China;Unknown;;United States" }, { "title": "Confounding-Robust Policy Improvement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11883", "id": "11883", "author_site": "Nathan Kallus, Angela Zhou", "author": "Nathan Kallus; Angela Zhou", "abstract": "We study the problem of learning personalized decision policies from observational data while accounting for possible unobserved confounding in the data-generating process. Unlike previous approaches that assume unconfoundedness, i.e., no unobserved confounders affected both treatment assignment and outcomes, we calibrate policy learning for realistic violations of this unverifiable assumption with uncertainty sets motivated by sensitivity analysis in causal inference. Our framework for confounding-robust policy improvement optimizes the minimax regret of a candidate policy against a baseline or reference \"status quo\" policy, over an uncertainty set around nominal propensity weights. We prove that if the uncertainty set is well-specified, robust policy learning can do no worse than the baseline, and only improve if the data supports it. We characterize the adversarial subproblem and use efficient algorithmic solutions to optimize over parametrized spaces of decision policies such as logistic treatment assignment. We assess our methods on synthetic data and a large clinical trial, demonstrating that confounded selection can hinder policy learning and lead to unwarranted harm, while our robust approach guarantees safety and focuses on well-evidenced improvement.", "bibtex": "@inproceedings{NEURIPS2018_3a09a524,\n author = {Kallus, Nathan and Zhou, Angela},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Confounding-Robust Policy Improvement},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3a09a524440d44d7f19870070a5ad42f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3a09a524440d44d7f19870070a5ad42f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3a09a524440d44d7f19870070a5ad42f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3a09a524440d44d7f19870070a5ad42f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3a09a524440d44d7f19870070a5ad42f-Reviews.html", "metareview": "", "pdf_size": 674332, "gs_citation": 207, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10477245195457818902&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Cornell University and Cornell Tech; Cornell University and Cornell Tech", "aff_domain": "cornell.edu;cornell.edu", "email": "cornell.edu;cornell.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3a09a524440d44d7f19870070a5ad42f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ithaca", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Connecting Optimization and Regularization Paths", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12003", "id": "12003", "author_site": "Arun Suggala, Adarsh Prasad, Pradeep Ravikumar", "author": "Arun Suggala; Adarsh Prasad; Pradeep K Ravikumar", "abstract": "We study the implicit regularization properties of optimization techniques by explicitly connecting their optimization paths to the regularization paths of ``corresponding'' regularized problems. This surprising connection shows that iterates of optimization techniques such as gradient descent and mirror descent are \\emph{pointwise} close to solutions of appropriately regularized objectives. While such a tight connection between optimization and regularization is of independent intellectual interest, it also has important implications for machine learning: we can port results from regularized estimators to optimization, and vice versa. We investigate one key consequence, that borrows from the well-studied analysis of regularized estimators, to then obtain tight excess risk bounds of the iterates generated by optimization techniques.", "bibtex": "@inproceedings{NEURIPS2018_6459257d,\n author = {Suggala, Arun and Prasad, Adarsh and Ravikumar, Pradeep K},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Connecting Optimization and Regularization Paths},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6459257ddab7b85bf4b57845e875e4d4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6459257ddab7b85bf4b57845e875e4d4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6459257ddab7b85bf4b57845e875e4d4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6459257ddab7b85bf4b57845e875e4d4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6459257ddab7b85bf4b57845e875e4d4-Reviews.html", "metareview": "", "pdf_size": 692660, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16076061301502344962&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6459257ddab7b85bf4b57845e875e4d4-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Connectionist Temporal Classification with Maximum Entropy Regularization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11105", "id": "11105", "author_site": "Hu Liu, Sheng Jin, Changshui Zhang", "author": "Hu Liu; Sheng Jin; Changshui Zhang", "abstract": "Connectionist Temporal Classification (CTC) is an objective function for end-to-end sequence learning, which adopts dynamic programming algorithms to directly learn the mapping between sequences. CTC has shown promising results in many sequence learning applications including speech recognition and scene text recognition. However, CTC tends to produce highly peaky and overconfident distributions, which is a symptom of overfitting. To remedy this, we propose a regularization method based on maximum conditional entropy which penalizes peaky distributions and encourages exploration. We also introduce an entropy-based pruning method to dramatically reduce the number of CTC feasible paths by ruling out unreasonable alignments. Experiments on scene text recognition show that our proposed methods consistently improve over the CTC baseline without the need to adjust training settings. Code has been made publicly available at: https://github.com/liuhu-bigeye/enctc.crnn.", "bibtex": "@inproceedings{NEURIPS2018_e44fea3b,\n author = {Liu, Hu and Jin, Sheng and Zhang, Changshui},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Connectionist Temporal Classification with Maximum Entropy Regularization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e44fea3bec53bcea3b7513ccef5857ac-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e44fea3bec53bcea3b7513ccef5857ac-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e44fea3bec53bcea3b7513ccef5857ac-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e44fea3bec53bcea3b7513ccef5857ac-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e44fea3bec53bcea3b7513ccef5857ac-Reviews.html", "metareview": "", "pdf_size": 770523, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16455105685023612483&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Institute for Arti\ufb01cial Intelligence, Tsinghua University (THUAI) + Beijing National Research Center for Information Science and Technology (BNRist) + State Key Lab of Intelligent Technologies and Systems + Department of Automation, Tsinghua University, Beijing, P.R.China; Institute for Arti\ufb01cial Intelligence, Tsinghua University (THUAI) + Beijing National Research Center for Information Science and Technology (BNRist) + State Key Lab of Intelligent Technologies and Systems + Department of Automation, Tsinghua University, Beijing, P.R.China; Institute for Arti\ufb01cial Intelligence, Tsinghua University (THUAI) + Beijing National Research Center for Information Science and Technology (BNRist) + State Key Lab of Intelligent Technologies and Systems + Department of Automation, Tsinghua University, Beijing, P.R.China", "aff_domain": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;mail.tsinghua.edu.cn", "email": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;mail.tsinghua.edu.cn", "github": "https://github.com/liuhu-bigeye/enctc.crnn", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e44fea3bec53bcea3b7513ccef5857ac-Abstract.html", "aff_unique_index": "0+1+2+0;0+1+2+0;0+1+2+0", "aff_unique_norm": "Tsinghua University;Beijing National Research Center for Information Science and Technology;State Key Lab of Intelligent Technologies and Systems", "aff_unique_dep": "Institute for Arti\ufb01cial Intelligence;;", "aff_unique_url": "https://www.tsinghua.edu.cn;;", "aff_unique_abbr": "THUAI;BNRist;", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0+0+0+0;0+0+0+0;0+0+0+0", "aff_country_unique": "China" }, { "title": "Constant Regret, Generalized Mixability, and Mirror Descent", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11714", "id": "11714", "author_site": "Zakaria Mhammedi, Robert Williamson", "author": "Zakaria Mhammedi; Robert C. Williamson", "abstract": "We consider the setting of prediction with expert advice; a learner makes predictions by aggregating those of a group of experts. Under this setting, and for the right choice of loss function and ``mixing'' algorithm, it is possible for the learner to achieve a constant regret regardless of the number of prediction rounds. For example, a constant regret can be achieved for \\emph{mixable} losses using the \\emph{aggregating algorithm}. The \\emph{Generalized Aggregating Algorithm} (GAA) is a name for a family of algorithms parameterized by convex functions on simplices (entropies), which reduce to the aggregating algorithm when using the \\emph{Shannon entropy} $\\operatorname{S}$. For a given entropy $\\Phi$, losses for which a constant regret is possible using the \\textsc{GAA} are called $\\Phi$-mixable. Which losses are $\\Phi$-mixable was previously left as an open question. We fully characterize $\\Phi$-mixability and answer other open questions posed by \\cite{Reid2015}. We show that the Shannon entropy $\\operatorname{S}$ is fundamental in nature when it comes to mixability; any $\\Phi$-mixable loss is necessarily $\\operatorname{S}$-mixable, and the lowest worst-case regret of the \\textsc{GAA} is achieved using the Shannon entropy. Finally, by leveraging the connection between the \\emph{mirror descent algorithm} and the update step of the GAA, we suggest a new \\emph{adaptive} generalized aggregating algorithm and analyze its performance in terms of the regret bound.", "bibtex": "@inproceedings{NEURIPS2018_af1b5754,\n author = {Mhammedi, Zakaria and Williamson, Robert C},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Constant Regret, Generalized Mixability, and Mirror Descent},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/af1b5754061ebbd4412adfb34c8d3534-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/af1b5754061ebbd4412adfb34c8d3534-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/af1b5754061ebbd4412adfb34c8d3534-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/af1b5754061ebbd4412adfb34c8d3534-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/af1b5754061ebbd4412adfb34c8d3534-Reviews.html", "metareview": "", "pdf_size": 360264, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9027854558349029872&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Research School of Computer Science, Australian National University and DATA61; Research School of Computer Science, Australian National University and DATA61", "aff_domain": "anu.edu.au;anu.edu.au", "email": "anu.edu.au;anu.edu.au", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/af1b5754061ebbd4412adfb34c8d3534-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Australian National University", "aff_unique_dep": "Research School of Computer Science", "aff_unique_url": "https://www.anu.edu.au", "aff_unique_abbr": "ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "title": "Constrained Cross-Entropy Method for Safe Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11717", "id": "11717", "author_site": "Min Wen, Ufuk Topcu", "author": "Min Wen; Ufuk Topcu", "abstract": "We study a safe reinforcement learning problem in which the constraints are defined as the expected cost over finite-length trajectories. We propose a constrained cross-entropy-based method to solve this problem. The method explicitly tracks its performance with respect to constraint satisfaction and thus is well-suited for safety-critical applications. We show that the asymptotic behavior of the proposed algorithm can be almost-surely described by that of an ordinary differential equation. Then we give sufficient conditions on the properties of this differential equation to guarantee the convergence of the proposed algorithm. At last, we show with simulation experiments that the proposed algorithm can effectively learn feasible policies without assumptions on the feasibility of initial policies, even with non-Markovian objective functions and constraint functions.", "bibtex": "@inproceedings{NEURIPS2018_34ffeb35,\n author = {Wen, Min and Topcu, Ufuk},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Constrained Cross-Entropy Method for Safe Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/34ffeb359a192eb8174b6854643cc046-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/34ffeb359a192eb8174b6854643cc046-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/34ffeb359a192eb8174b6854643cc046-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/34ffeb359a192eb8174b6854643cc046-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/34ffeb359a192eb8174b6854643cc046-Reviews.html", "metareview": "", "pdf_size": 1003747, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3657337875106138059&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Electrical and Systems Engineering, University of Pennsylvania; Department of Aerospace Engineering and Engineering Mechanics, University of Texas at Austin", "aff_domain": "seas.upenn.edu;utexas.edu", "email": "seas.upenn.edu;utexas.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/34ffeb359a192eb8174b6854643cc046-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University of Pennsylvania;University of Texas at Austin", "aff_unique_dep": "Department of Electrical and Systems Engineering;Department of Aerospace Engineering and Engineering Mechanics", "aff_unique_url": "https://www.upenn.edu;https://www.utexas.edu", "aff_unique_abbr": "UPenn;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Constrained Generation of Semantically Valid Graphs via Regularizing Variational Autoencoders", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11685", "id": "11685", "author_site": "Tengfei Ma, Jie Chen, Cao Xiao", "author": "Tengfei Ma; Jie Chen; Cao Xiao", "abstract": "Deep generative models have achieved remarkable success in various data domains, including images, time series, and natural languages. There remain, however, substantial challenges for combinatorial structures, including graphs. One of the key challenges lies in the difficulty of ensuring semantic validity in context. For example, in molecular graphs, the number of bonding-electron pairs must not exceed the valence of an atom; whereas in protein interaction networks, two proteins may be connected only when they belong to the same or correlated gene ontology terms. These constraints are not easy to be incorporated into a generative model. In this work, we propose a regularization framework for variational autoencoders as a step toward semantic validity. We focus on the matrix representation of graphs and formulate penalty terms that regularize the output distribution of the decoder to encourage the satisfaction of validity constraints. Experimental results confirm a much higher likelihood of sampling valid graphs in our approach, compared with others reported in the literature.", "bibtex": "@inproceedings{NEURIPS2018_1458e750,\n author = {Ma, Tengfei and Chen, Jie and Xiao, Cao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Constrained Generation of Semantically Valid Graphs via Regularizing Variational Autoencoders},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1458e7509aa5f47ecfb92536e7dd1dc7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1458e7509aa5f47ecfb92536e7dd1dc7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1458e7509aa5f47ecfb92536e7dd1dc7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1458e7509aa5f47ecfb92536e7dd1dc7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1458e7509aa5f47ecfb92536e7dd1dc7-Reviews.html", "metareview": "", "pdf_size": 580753, "gs_citation": 283, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8461416587658034730&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "IBM Research; IBM Research; IBM Research", "aff_domain": "ibm.com;us.ibm.com;us.ibm.com", "email": "ibm.com;us.ibm.com;us.ibm.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1458e7509aa5f47ecfb92536e7dd1dc7-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "IBM", "aff_unique_dep": "IBM Research", "aff_unique_url": "https://www.ibm.com/research", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Constrained Graph Variational Autoencoders for Molecule Design", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11748", "id": "11748", "author_site": "Qi Liu, Miltiadis Allamanis, Marc Brockschmidt, Alexander Gaunt", "author": "Qi Liu; Miltiadis Allamanis; Marc Brockschmidt; Alexander Gaunt", "abstract": "Graphs are ubiquitous data structures for representing interactions between entities. With an emphasis on applications in chemistry, we explore the task of learning to generate graphs that conform to a distribution observed in training data. We propose a variational autoencoder model in which both encoder and decoder are graph-structured. Our decoder assumes a sequential ordering of graph extension steps and we discuss and analyze design choices that mitigate the potential downsides of this linearization. Experiments compare our approach with a wide range of baselines on the molecule generation task and show that our method is successful at matching the statistics of the original dataset on semantically important metrics. Furthermore, we show that by using appropriate shaping of the latent space, our model allows us to design molecules that are (locally) optimal in desired properties.", "bibtex": "@inproceedings{NEURIPS2018_b8a03c5c,\n author = {Liu, Qi and Allamanis, Miltiadis and Brockschmidt, Marc and Gaunt, Alexander},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Constrained Graph Variational Autoencoders for Molecule Design},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b8a03c5c15fcfa8dae0b03351eb1742f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b8a03c5c15fcfa8dae0b03351eb1742f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b8a03c5c15fcfa8dae0b03351eb1742f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b8a03c5c15fcfa8dae0b03351eb1742f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b8a03c5c15fcfa8dae0b03351eb1742f-Reviews.html", "metareview": "", "pdf_size": 1314565, "gs_citation": 617, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2838800553083041205&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Singapore University of Technology and Design; Microsoft Research, Cambridge; Microsoft Research, Cambridge; Microsoft Research, Cambridge", "aff_domain": "u.nus.edu;microsoft.com;microsoft.com;microsoft.com", "email": "u.nus.edu;microsoft.com;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b8a03c5c15fcfa8dae0b03351eb1742f-Abstract.html", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Singapore University of Technology and Design;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.sutd.edu.sg;https://www.microsoft.com/en-us/research/group/cambridge", "aff_unique_abbr": "SUTD;MSR", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Singapore;United Kingdom" }, { "title": "Constructing Deep Neural Networks by Bayesian Network Structure Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11310", "id": "11310", "author_site": "Raanan Rohekar, Shami Nisimov, Yaniv Gurwicz, Guy Koren, Gal Novik", "author": "Raanan Y. Rohekar; Shami Nisimov; Yaniv Gurwicz; Guy Koren; Gal Novik", "abstract": "We introduce a principled approach for unsupervised structure learning of deep neural networks. We propose a new interpretation for depth and inter-layer connectivity where conditional independencies in the input distribution are encoded hierarchically in the network structure. Thus, the depth of the network is determined inherently. The proposed method casts the problem of neural network structure learning as a problem of Bayesian network structure learning. Then, instead of directly learning the discriminative structure, it learns a generative graph, constructs its stochastic inverse, and then constructs a discriminative graph. We prove that conditional-dependency relations among the latent variables in the generative graph are preserved in the class-conditional discriminative graph. We demonstrate on image classification benchmarks that the deepest layers (convolutional and dense) of common networks can be replaced by significantly smaller learned structures, while maintaining classification accuracy---state-of-the-art on tested benchmarks. Our structure learning algorithm requires a small computational cost and runs efficiently on a standard desktop CPU.", "bibtex": "@inproceedings{NEURIPS2018_95d309f0,\n author = {Rohekar, Raanan Y. and Nisimov, Shami and Gurwicz, Yaniv and Koren, Guy and Novik, Gal},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Constructing Deep Neural Networks by Bayesian Network Structure Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/95d309f0b035d97f69902e7972c2b2e6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/95d309f0b035d97f69902e7972c2b2e6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/95d309f0b035d97f69902e7972c2b2e6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/95d309f0b035d97f69902e7972c2b2e6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/95d309f0b035d97f69902e7972c2b2e6-Reviews.html", "metareview": "", "pdf_size": 604601, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11542504438944807459&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Intel AI Lab; Intel AI Lab; Intel AI Lab; Intel AI Lab; Intel AI Lab", "aff_domain": "intel.com;intel.com;intel.com;intel.com;intel.com", "email": "intel.com;intel.com;intel.com;intel.com;intel.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/95d309f0b035d97f69902e7972c2b2e6-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Intel", "aff_unique_dep": "Intel AI Lab", "aff_unique_url": "https://www.intel.com", "aff_unique_abbr": "Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Constructing Fast Network through Deconstruction of Convolution", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11578", "id": "11578", "author_site": "Yunho Jeon, Junmo Kim", "author": "Yunho Jeon; Junmo Kim", "abstract": "Convolutional neural networks have achieved great success in various vision tasks; however, they incur heavy resource costs. By using deeper and wider networks, network accuracy can be improved rapidly. However, in an environment with limited resources (e.g., mobile applications), heavy networks may not be usable. This study shows that naive convolution can be deconstructed into a shift operation and pointwise convolution. To cope with various convolutions, we propose a new shift operation called active shift layer (ASL) that formulates the amount of shift as a learnable function with shift parameters. This new layer can be optimized end-to-end through backpropagation and it can provide optimal shift values. Finally, we apply this layer to a light and fast network that surpasses existing state-of-the-art networks.", "bibtex": "@inproceedings{NEURIPS2018_9719a00e,\n author = {Jeon, Yunho and Kim, Junmo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Constructing Fast Network through Deconstruction of Convolution},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9719a00ed0c5709d80dfef33795dcef3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9719a00ed0c5709d80dfef33795dcef3-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9719a00ed0c5709d80dfef33795dcef3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9719a00ed0c5709d80dfef33795dcef3-Reviews.html", "metareview": "", "pdf_size": 1120719, "gs_citation": 98, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15893085353567655931&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "School of Electrical Engineering, KAIST; School of Electrical Engineering, KAIST", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "email": "kaist.ac.kr;kaist.ac.kr", "github": "https://github.com/jyh2986/Active-Shift", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9719a00ed0c5709d80dfef33795dcef3-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "KAIST", "aff_unique_dep": "School of Electrical Engineering", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Constructing Unrestricted Adversarial Examples with Generative Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11795", "id": "11795", "author_site": "Yang Song, Rui Shu, Nate Kushman, Stefano Ermon", "author": "Yang Song; Rui Shu; Nate Kushman; Stefano Ermon", "abstract": "Adversarial examples are typically constructed by perturbing an existing data point within a small matrix norm, and current defense methods are focused on guarding against this type of attack. In this paper, we propose a new class of adversarial examples that are synthesized entirely from scratch using a conditional generative model, without being restricted to norm-bounded perturbations. We first train an Auxiliary Classifier Generative Adversarial Network (AC-GAN) to model the class-conditional distribution over data samples. Then, conditioned on a desired class, we search over the AC-GAN latent space to find images that are likely under the generative model and are misclassified by a target classifier. We demonstrate through human evaluation that these new kind of adversarial images, which we call Generative Adversarial Examples, are legitimate and belong to the desired class. Our empirical results on the MNIST, SVHN, and CelebA datasets show that generative adversarial examples can bypass strong adversarial training and certified defense methods designed for traditional adversarial attacks.", "bibtex": "@inproceedings{NEURIPS2018_8cea559c,\n author = {Song, Yang and Shu, Rui and Kushman, Nate and Ermon, Stefano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Constructing Unrestricted Adversarial Examples with Generative Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8cea559c47e4fbdb73b23e0223d04e79-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8cea559c47e4fbdb73b23e0223d04e79-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8cea559c47e4fbdb73b23e0223d04e79-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8cea559c47e4fbdb73b23e0223d04e79-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8cea559c47e4fbdb73b23e0223d04e79-Reviews.html", "metareview": "", "pdf_size": 2794761, "gs_citation": 350, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14086270849571978699&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Stanford University; Stanford University; Microsoft Research; Stanford University", "aff_domain": "cs.stanford.edu;cs.stanford.edu;microsoft.com;cs.stanford.edu", "email": "cs.stanford.edu;cs.stanford.edu;microsoft.com;cs.stanford.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8cea559c47e4fbdb73b23e0223d04e79-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Stanford University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.stanford.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Stanford;MSR", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Contamination Attacks and Mitigation in Multi-Party Machine Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11638", "id": "11638", "author_site": "Jamie Hayes, Olga Ohrimenko", "author": "Jamie Hayes; Olga Ohrimenko", "abstract": "Machine learning is data hungry; the more data a model has access to in training, the more likely it is to perform well at inference time. Distinct parties may want to combine their local data to gain the benefits of a model trained on a large corpus of data. We consider such a case: parties get access to the model trained on their joint data but do not see each others individual datasets. We show that one needs to be careful when using this multi-party model since a potentially malicious party can taint the model by providing contaminated data. We then show how adversarial training can defend against such attacks by preventing the model from learning trends specific to individual parties data, thereby also guaranteeing party-level membership privacy.", "bibtex": "@inproceedings{NEURIPS2018_331316d4,\n author = {Hayes, Jamie and Ohrimenko, Olga},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Contamination Attacks and Mitigation in Multi-Party Machine Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/331316d4efb44682092a006307b9ae3a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/331316d4efb44682092a006307b9ae3a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/331316d4efb44682092a006307b9ae3a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/331316d4efb44682092a006307b9ae3a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/331316d4efb44682092a006307b9ae3a-Reviews.html", "metareview": "", "pdf_size": 301164, "gs_citation": 102, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17130495386461135916&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Univeristy College London; Microsoft Research", "aff_domain": "\u2765\u2733\u2764\u275b\u2461\u2761s\u2745\u275ds\u2733\u2709\u275d\u2767\u2733\u275b\u275d\u2733\u2709\u2766; \u2666\u2666\u2764r\u2710\u2660\u2745\u2660\u2710\u275dr\u2666s\u2666\u2762t\u2733\u275d\u2666\u2660", "email": "\u2765\u2733\u2764\u275b\u2461\u2761s\u2745\u275ds\u2733\u2709\u275d\u2767\u2733\u275b\u275d\u2733\u2709\u2766; \u2666\u2666\u2764r\u2710\u2660\u2745\u2660\u2710\u275dr\u2666s\u2666\u2762t\u2733\u275d\u2666\u2660", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/331316d4efb44682092a006307b9ae3a-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University College London;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.ucl.ac.uk;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UCL;MSR", "aff_campus_unique_index": "0", "aff_campus_unique": "London;", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Content preserving text generation with attribute controls", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11499", "id": "11499", "author_site": "Lajanugen Logeswaran, Honglak Lee, Samy Bengio", "author": "Lajanugen Logeswaran; Honglak Lee; Samy Bengio", "abstract": "In this work, we address the problem of modifying textual attributes of sentences. Given an input sentence and a set of attribute labels, we attempt to generate sentences that are compatible with the conditioning information. To ensure that the model generates content compatible sentences, we introduce a reconstruction loss which interpolates between auto-encoding and back-translation loss components. We propose an adversarial loss to enforce generated samples to be attribute compatible and realistic. Through quantitative, qualitative and human evaluations we demonstrate that our model is capable of generating fluent sentences that better reflect the conditioning information compared to prior methods. We further demonstrate that the model is capable of simultaneously controlling multiple attributes.", "bibtex": "@inproceedings{NEURIPS2018_7cf64379,\n author = {Logeswaran, Lajanugen and Lee, Honglak and Bengio, Samy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Content preserving text generation with attribute controls},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7cf64379eb6f29a4d25c4b6a2df713e4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7cf64379eb6f29a4d25c4b6a2df713e4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7cf64379eb6f29a4d25c4b6a2df713e4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7cf64379eb6f29a4d25c4b6a2df713e4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7cf64379eb6f29a4d25c4b6a2df713e4-Reviews.html", "metareview": "", "pdf_size": 215130, "gs_citation": 142, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5681872942809862968&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "University of Michigan; Google Brain; Google Brain", "aff_domain": "umich.edu;google.com;google.com", "email": "umich.edu;google.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7cf64379eb6f29a4d25c4b6a2df713e4-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Michigan;Google", "aff_unique_dep": ";Google Brain", "aff_unique_url": "https://www.umich.edu;https://brain.google.com", "aff_unique_abbr": "UM;Google Brain", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Context-aware Synthesis and Placement of Object Instances", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11983", "id": "11983", "author_site": "Donghoon Lee, Sifei Liu, Jinwei Gu, Ming-Yu Liu, Ming-Hsuan Yang, Jan Kautz", "author": "Donghoon Lee; Sifei Liu; Jinwei Gu; Ming-Yu Liu; Ming-Hsuan Yang; Jan Kautz", "abstract": "Learning to insert an object instance into an image in a semantically coherent\nmanner is a challenging and interesting problem. Solving it requires (a) determining a location to place an object in the scene and (b) determining its appearance at the location. Such an object insertion model can potentially facilitate numerous image editing and scene parsing applications. In this paper, we propose an end-to-end trainable neural network for the task of inserting an object instance mask of a specified class into the semantic label map of an image. Our network consists of two generative modules where one determines where the inserted object mask should be (i.e., location and scale) and the other determines what the object mask shape (and pose) should look like. The two modules are connected together via a spatial transformation network and jointly trained. We devise a learning procedure that leverage both supervised and unsupervised data and show our model can insert an object at diverse locations with various appearances. We conduct extensive experimental validations with comparisons to strong baselines to verify the effectiveness of the proposed network.", "bibtex": "@inproceedings{NEURIPS2018_c6969ae3,\n author = {Lee, Donghoon and Liu, Sifei and Gu, Jinwei and Liu, Ming-Yu and Yang, Ming-Hsuan and Kautz, Jan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Context-aware Synthesis and Placement of Object Instances},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c6969ae30d99f73951cb976b88a457af-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c6969ae30d99f73951cb976b88a457af-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c6969ae30d99f73951cb976b88a457af-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c6969ae30d99f73951cb976b88a457af-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c6969ae30d99f73951cb976b88a457af-Reviews.html", "metareview": "", "pdf_size": 4141116, "gs_citation": 139, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16175327312247199712&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "Seoul National University+Google Cloud AI; NVIDIA; NVIDIA; NVIDIA; Google Cloud AI+University of California at Merced; NVIDIA", "aff_domain": "rllab.snu.ac.kr;nvidia.com;nvidia.com;nvidia.com;ucmerced.edu;nvidia.com", "email": "rllab.snu.ac.kr;nvidia.com;nvidia.com;nvidia.com;ucmerced.edu;nvidia.com", "github": "https://github.com/NVlabs/Instance_Insertion", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c6969ae30d99f73951cb976b88a457af-Abstract.html", "aff_unique_index": "0+1;2;2;2;1+3;2", "aff_unique_norm": "Seoul National University;Google;NVIDIA;University of California, Merced", "aff_unique_dep": ";Google Cloud AI;NVIDIA Corporation;", "aff_unique_url": "https://www.snu.ac.kr;https://cloud.google.com/ai;https://www.nvidia.com;https://www.ucmerced.edu", "aff_unique_abbr": "SNU;Google Cloud AI;NVIDIA;UC Merced", "aff_campus_unique_index": "1;1+2", "aff_campus_unique": ";Mountain View;Merced", "aff_country_unique_index": "0+1;1;1;1;1+1;1", "aff_country_unique": "South Korea;United States" }, { "title": "Context-dependent upper-confidence bounds for directed exploration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11469", "id": "11469", "author_site": "Raksha Kumaraswamy, Matthew Schlegel, Adam White, Martha White", "author": "Raksha Kumaraswamy; Matthew Schlegel; Adam White; Martha White", "abstract": "Directed exploration strategies for reinforcement learning are critical for learning an optimal policy in a minimal number of interactions with the environment. Many algorithms use optimism to direct exploration, either through visitation estimates or upper confidence bounds, as opposed to data-inefficient strategies like e-greedy that use random, undirected exploration. Most data-efficient exploration methods require significant computation, typically relying on a learned model to guide exploration. Least-squares methods have the potential to provide some of the data-efficiency benefits of model-based approaches\u2014because they summarize past interactions\u2014with the computation closer to that of model-free approaches. In this work, we provide a novel, computationally efficient, incremental exploration strategy, leveraging this property of least-squares temporal difference learning (LSTD). We derive upper confidence bounds on the action-values learned by LSTD, with context-dependent (or state-dependent) noise variance. Such context-dependent noise focuses exploration on a subset of variable states, and allows for reduced exploration in other states. We empirically demonstrate that our algorithm can converge more quickly than other incremental exploration strategies using confidence estimates on action-values.", "bibtex": "@inproceedings{NEURIPS2018_f516dfb8,\n author = {Kumaraswamy, Raksha and Schlegel, Matthew and White, Adam and White, Martha},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Context-dependent upper-confidence bounds for directed exploration},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f516dfb84b9051ed85b89cdc3a8ab7f5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f516dfb84b9051ed85b89cdc3a8ab7f5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f516dfb84b9051ed85b89cdc3a8ab7f5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f516dfb84b9051ed85b89cdc3a8ab7f5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f516dfb84b9051ed85b89cdc3a8ab7f5-Reviews.html", "metareview": "", "pdf_size": 1349831, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12106286640916203939&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Computing Science, University of Alberta; Department of Computing Science, University of Alberta; Department of Computing Science, University of Alberta + DeepMind; Department of Computing Science, University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca;google.com;ualberta.ca", "email": "ualberta.ca;ualberta.ca;google.com;ualberta.ca", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f516dfb84b9051ed85b89cdc3a8ab7f5-Abstract.html", "aff_unique_index": "0;0;0+1;0", "aff_unique_norm": "University of Alberta;DeepMind", "aff_unique_dep": "Department of Computing Science;", "aff_unique_url": "https://www.ualberta.ca;https://deepmind.com", "aff_unique_abbr": "UAlberta;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+1;0", "aff_country_unique": "Canada;United Kingdom" }, { "title": "Contextual Combinatorial Multi-armed Bandits with Volatile Arms and Submodular Reward", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11328", "id": "11328", "author_site": "Lixing Chen, Jie Xu, Zhuo Lu", "author": "Lixing Chen; Jie Xu; Zhuo Lu", "abstract": "In this paper, we study the stochastic contextual combinatorial multi-armed bandit (CC-MAB) framework that is tailored for volatile arms and submodular reward functions. CC-MAB inherits properties from both contextual bandit and combinatorial bandit: it aims to select a set of arms in each round based on the side information (a.k.a. context) associated with the arms. By ``volatile arms'', we mean that the available arms to select from in each round may change; and by ``submodular rewards'', we mean that the total reward achieved by selected arms is not a simple sum of individual rewards but demonstrates a feature of diminishing returns determined by the relations between selected arms (e.g. relevance and redundancy). Volatile arms and submodular rewards are often seen in many real-world applications, e.g. recommender systems and crowdsourcing, in which multi-armed bandit (MAB) based strategies are extensively applied. Although there exist works that investigate these issues separately based on standard MAB, jointly considering all these issues in a single MAB problem requires very different algorithm design and regret analysis. Our algorithm CC-MAB provides an online decision-making policy in a contextual and combinatorial bandit setting and effectively addresses the issues raised by volatile arms and submodular reward functions. The proposed algorithm is proved to achieve $O(cT^{\\frac{2\\alpha+D}{3\\alpha + D}}\\log(T))$ regret after a span of $T$ rounds. The performance of CC-MAB is evaluated by experiments conducted on a real-world crowdsourcing dataset, and the result shows that our algorithm outperforms the prior art.", "bibtex": "@inproceedings{NEURIPS2018_207f8801,\n author = {Chen, Lixing and Xu, Jie and Lu, Zhuo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Contextual Combinatorial Multi-armed Bandits with Volatile Arms and Submodular Reward},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/207f88018f72237565570f8a9e5ca240-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/207f88018f72237565570f8a9e5ca240-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/207f88018f72237565570f8a9e5ca240-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/207f88018f72237565570f8a9e5ca240-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/207f88018f72237565570f8a9e5ca240-Reviews.html", "metareview": "", "pdf_size": 247731, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8416297762217274136&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Department of Electrical and Computer Engineering, University of Miami; Department of Electrical and Computer Engineering, University of Miami; Department of Electrical Engineering, University of South Florida", "aff_domain": "miami.edu;miami.edu;usf.edu", "email": "miami.edu;miami.edu;usf.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/207f88018f72237565570f8a9e5ca240-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Miami;University of South Florida", "aff_unique_dep": "Department of Electrical and Computer Engineering;Department of Electrical Engineering", "aff_unique_url": "https://www.miami.edu;https://www.usf.edu", "aff_unique_abbr": "UM;USF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Contextual Pricing for Lipschitz Buyers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11550", "id": "11550", "author_site": "Jieming Mao, Renato Leme, Jon Schneider", "author": "Jieming Mao; Renato Leme; Jon Schneider", "abstract": "We investigate the problem of learning a Lipschitz function from binary\n feedback. In this problem, a learner is trying to learn a Lipschitz function\n $f:[0,1]^d \\rightarrow [0,1]$ over the course of $T$ rounds. On round $t$, an\n adversary provides the learner with an input $x_t$, the learner submits a\n guess $y_t$ for $f(x_t)$, and learns whether $y_t > f(x_t)$ or $y_t \\leq\n f(x_t)$. The learner's goal is to minimize their total loss $\\sum_t\\ell(f(x_t),\n y_t)$ (for some loss function $\\ell$). The problem is motivated by \\textit{contextual dynamic pricing},\n where a firm must sell a stream of differentiated products to a collection of\n buyers with non-linear valuations for the items and observes only whether the\n item was sold or not at the posted price.\n\n For the symmetric loss $\\ell(f(x_t), y_t) = \\vert f(x_t) - y_t \\vert$, we\n provide an algorithm for this problem achieving total loss $O(\\log T)$\n when $d=1$ and $O(T^{(d-1)/d})$ when $d>1$, and show that both bounds are\n tight (up to a factor of $\\sqrt{\\log T}$). For the pricing loss function\n $\\ell(f(x_t), y_t) = f(x_t) - y_t {\\bf 1}\\{y_t \\leq f(x_t)\\}$ we show a regret\n bound of $O(T^{d/(d+1)})$ and show that this bound is tight. We present\n improved bounds in the special case of a population of linear buyers.", "bibtex": "@inproceedings{NEURIPS2018_403ea2e8,\n author = {Mao, Jieming and Leme, Renato and Schneider, Jon},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Contextual Pricing for Lipschitz Buyers},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/403ea2e851b9ab04a996beab4a480a30-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/403ea2e851b9ab04a996beab4a480a30-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/403ea2e851b9ab04a996beab4a480a30-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/403ea2e851b9ab04a996beab4a480a30-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/403ea2e851b9ab04a996beab4a480a30-Reviews.html", "metareview": "", "pdf_size": 247511, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3369384052718795600&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "University of Pennsylvania; Google Research; Google Research", "aff_domain": "seas.upenn.edu;google.com;google.com", "email": "seas.upenn.edu;google.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/403ea2e851b9ab04a996beab4a480a30-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Pennsylvania;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.upenn.edu;https://research.google", "aff_unique_abbr": "UPenn;Google Research", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Contextual Stochastic Block Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11820", "id": "11820", "author_site": "Yash Deshpande, Subhabrata Sen, Andrea Montanari, Elchanan Mossel", "author": "Yash Deshpande; Subhabrata Sen; Andrea Montanari; Elchanan Mossel", "abstract": "We provide the first information theoretical tight analysis for inference of latent community structure given a sparse graph along with high dimensional node covariates, correlated with the same latent communities. Our work bridges recent theoretical breakthroughs in detection of latent community structure without nodes covariates and a large body of empirical work using diverse heuristics for combining node covariates with graphs for inference. The tightness of our analysis implies in particular, the information theoretic necessity of combining the different sources of information. \nOur analysis holds for networks of large degrees as well as for a Gaussian version of the model.", "bibtex": "@inproceedings{NEURIPS2018_08fc80de,\n author = {Deshpande, Yash and Sen, Subhabrata and Montanari, Andrea and Mossel, Elchanan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Contextual Stochastic Block Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/08fc80de8121419136e443a70489c123-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/08fc80de8121419136e443a70489c123-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/08fc80de8121419136e443a70489c123-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/08fc80de8121419136e443a70489c123-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/08fc80de8121419136e443a70489c123-Reviews.html", "metareview": "", "pdf_size": 344229, "gs_citation": 204, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14581459314001286057&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/08fc80de8121419136e443a70489c123-Abstract.html" }, { "title": "Contextual bandits with surrogate losses: Margin bounds and efficient algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11270", "id": "11270", "author_site": "Dylan Foster, Akshay Krishnamurthy", "author": "Dylan J Foster; Akshay Krishnamurthy", "abstract": "We use surrogate losses to obtain several new regret bounds and new algorithms for contextual bandit learning. Using the ramp loss, we derive a new margin-based regret bound in terms of standard sequential complexity measures of a benchmark class of real-valued regression functions. Using the hinge loss, we derive an efficient algorithm with a $\\sqrt{dT}$-type mistake bound against benchmark policies induced by $d$-dimensional regressors. Under realizability assumptions, our results also yield classical regret bounds.", "bibtex": "@inproceedings{NEURIPS2018_01e9565c,\n author = {Foster, Dylan J and Krishnamurthy, Akshay},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Contextual bandits with surrogate losses: Margin bounds and efficient algorithms},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/01e9565cecc4e989123f9620c1d09c09-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/01e9565cecc4e989123f9620c1d09c09-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/01e9565cecc4e989123f9620c1d09c09-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/01e9565cecc4e989123f9620c1d09c09-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/01e9565cecc4e989123f9620c1d09c09-Reviews.html", "metareview": "", "pdf_size": 1535806, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7341390659698672715&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Cornell University; Microsoft Research, NYC", "aff_domain": "cs.cornell.edu;cs.umass.edu", "email": "cs.cornell.edu;cs.umass.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/01e9565cecc4e989123f9620c1d09c09-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Cornell University;Microsoft", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.cornell.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Cornell;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York City", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Continuous-time Value Function Approximation in Reproducing Kernel Hilbert Spaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11288", "id": "11288", "author_site": "Motoya Ohnishi, Masahiro Yukawa, Mikael Johansson, Masashi Sugiyama", "author": "Motoya Ohnishi; Masahiro Yukawa; Mikael Johansson; Masashi Sugiyama", "abstract": "Motivated by the success of reinforcement learning (RL) for discrete-time tasks such as AlphaGo and Atari games, there has been a recent surge of interest in using RL for continuous-time control of physical systems (cf. many challenging tasks in OpenAI Gym and DeepMind Control Suite).\nSince discretization of time is susceptible to error, it is methodologically more desirable to handle the system dynamics directly in continuous time.\nHowever, very few techniques exist for continuous-time RL and they lack flexibility in value function approximation.\nIn this paper, we propose a novel framework for model-based continuous-time value function approximation in reproducing kernel Hilbert spaces.\nThe resulting framework is so flexible that it can accommodate any kind of kernel-based approach, such as Gaussian processes and kernel adaptive filters, and it allows us to handle uncertainties and nonstationarity without prior knowledge about the environment or what basis functions to employ.\nWe demonstrate the validity of the presented framework through experiments.", "bibtex": "@inproceedings{NEURIPS2018_729c6888,\n author = {Ohnishi, Motoya and Yukawa, Masahiro and Johansson, Mikael and Sugiyama, Masashi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Continuous-time Value Function Approximation in Reproducing Kernel Hilbert Spaces},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/729c68884bd359ade15d5f163166738a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/729c68884bd359ade15d5f163166738a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/729c68884bd359ade15d5f163166738a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/729c68884bd359ade15d5f163166738a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/729c68884bd359ade15d5f163166738a-Reviews.html", "metareview": "", "pdf_size": 632762, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6372007531339185222&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Keio Univ. + KTH + RIKEN; Keio Univ. + RIKEN; KTH; RIKEN + Univ. Tokyo", "aff_domain": "riken.jp;elec.keio.ac.jp;ee.kth.se;riken.jp", "email": "riken.jp;elec.keio.ac.jp;ee.kth.se;riken.jp", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/729c68884bd359ade15d5f163166738a-Abstract.html", "aff_unique_index": "0+1+2;0+2;1;2+3", "aff_unique_norm": "Keio University;KTH Royal Institute of Technology;RIKEN;University of Tokyo", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.keio.ac.jp;https://www.kth.se;https://www.riken.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "Keio;KTH;RIKEN;UTokyo", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0+1+0;0+0;1;0+0", "aff_country_unique": "Japan;Sweden" }, { "title": "Contour location via entropy reduction leveraging multiple information sources", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11510", "id": "11510", "author_site": "Alexandre Marques, Remi Lam, Karen Willcox", "author": "Alexandre Marques; Remi Lam; Karen Willcox", "abstract": "We introduce an algorithm to locate contours of functions that are expensive to evaluate. The problem of locating contours arises in many applications, including classification, constrained optimization, and performance analysis of mechanical and dynamical systems (reliability, probability of failure, stability, etc.). Our algorithm locates contours using information from multiple sources, which are available in the form of relatively inexpensive, biased, and possibly noisy\n approximations to the original function. Considering multiple information sources can lead to significant cost savings. We also introduce the concept of contour entropy, a formal measure of uncertainty about the location of the zero contour of a function approximated by a statistical surrogate model. Our algorithm locates contours efficiently by maximizing the reduction of contour entropy per unit cost.", "bibtex": "@inproceedings{NEURIPS2018_01a06836,\n author = {Marques, Alexandre and Lam, Remi and Willcox, Karen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Contour location via entropy reduction leveraging multiple information sources},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/01a0683665f38d8e5e567b3b15ca98bf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/01a0683665f38d8e5e567b3b15ca98bf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/01a0683665f38d8e5e567b3b15ca98bf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/01a0683665f38d8e5e567b3b15ca98bf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/01a0683665f38d8e5e567b3b15ca98bf-Reviews.html", "metareview": "", "pdf_size": 1002234, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7471806439949353531&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Aeronautics and Astronautics, Massachusetts Institute of Technology; Center for Computational Engineering, Massachusetts Institute of Technology; Institute for Computational Engineering and Sciences, University of Texas at Austin", "aff_domain": "mit.edu;mit.edu;ices.utexas.edu", "email": "mit.edu;mit.edu;ices.utexas.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/01a0683665f38d8e5e567b3b15ca98bf-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of Texas at Austin", "aff_unique_dep": "Department of Aeronautics and Astronautics;Institute for Computational Engineering and Sciences", "aff_unique_url": "https://web.mit.edu;https://wwwICES.utexas.edu", "aff_unique_abbr": "MIT;UT Austin", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Cambridge;Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Contrastive Learning from Pairwise Measurements", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12031", "id": "12031", "author_site": "Yi Chen, Zhuoran Yang, Yuchen Xie, Zhaoran Wang", "author": "Yi Chen; Zhuoran Yang; Yuchen Xie; Zhaoran Wang", "abstract": "Learning from pairwise measurements naturally arises from many applications, such as rank aggregation, ordinal embedding, and crowdsourcing. However, most existing models and algorithms are susceptible to potential model misspecification. In this paper, we study a semiparametric model where the pairwise measurements follow a natural exponential family distribution with an unknown base measure. Such a semiparametric model includes various popular parametric models, such as the Bradley-Terry-Luce model and the paired cardinal model, as special cases. To estimate this semiparametric model without specifying the base measure, we propose a data augmentation technique to create virtual examples, which enables us to define a contrastive estimator. In particular, we prove that such a contrastive estimator is invariant to model misspecification within the natural exponential family, and moreover, attains the optimal statistical rate of convergence up to a logarithmic factor. We provide numerical experiments to corroborate our theory.", "bibtex": "@inproceedings{NEURIPS2018_6bf733bb,\n author = {Chen, Yi and Yang, Zhuoran and Xie, Yuchen and Wang, Zhaoran},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Contrastive Learning from Pairwise Measurements},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6bf733bb7f81e866306e9b5f012419cb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6bf733bb7f81e866306e9b5f012419cb-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6bf733bb7f81e866306e9b5f012419cb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6bf733bb7f81e866306e9b5f012419cb-Reviews.html", "metareview": "", "pdf_size": 353944, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16687614572730830798&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Northwestern University; Princeton University; Northwestern University; Northwestern University", "aff_domain": "u.northwestern.edu;u.northwestern.edu;princeton.edu;northwestern.edu", "email": "u.northwestern.edu;u.northwestern.edu;princeton.edu;northwestern.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6bf733bb7f81e866306e9b5f012419cb-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Northwestern University;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.northwestern.edu;https://www.princeton.edu", "aff_unique_abbr": "NU;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Convergence of Cubic Regularization for Nonconvex Optimization under KL Property", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11375", "id": "11375", "author_site": "Yi Zhou, Zhe Wang, Yingbin Liang", "author": "Yi Zhou; Zhe Wang; Yingbin Liang", "abstract": "Cubic-regularized Newton's method (CR) is a popular algorithm that guarantees to produce a second-order stationary solution for solving nonconvex optimization problems. However, existing understandings of convergence rate of CR are conditioned on special types of geometrical properties of the objective function. In this paper, we explore the asymptotic convergence rate of CR by exploiting the ubiquitous Kurdyka-Lojasiewicz (KL) property of the nonconvex objective functions. In specific, we characterize the asymptotic convergence rate of various types of optimality measures for CR including function value gap, variable distance gap, gradient norm and least eigenvalue of the Hessian matrix. Our results fully characterize the diverse convergence behaviors of these optimality measures in the full parameter regime of the KL property. Moreover, we show that the obtained asymptotic convergence rates of CR are order-wise faster than those of first-order gradient descent algorithms under the KL property.", "bibtex": "@inproceedings{NEURIPS2018_b4568df2,\n author = {Zhou, Yi and Wang, Zhe and Liang, Yingbin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Convergence of Cubic Regularization for Nonconvex Optimization under KL Property},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b4568df26077653eeadf29596708c94b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b4568df26077653eeadf29596708c94b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b4568df26077653eeadf29596708c94b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b4568df26077653eeadf29596708c94b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b4568df26077653eeadf29596708c94b-Reviews.html", "metareview": "", "pdf_size": 317727, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1021202422150508681&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of ECE, The Ohio State University; Department of ECE, The Ohio State University; Department of ECE, The Ohio State University", "aff_domain": "osu.edu;osu.edu;osu.edu", "email": "osu.edu;osu.edu;osu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b4568df26077653eeadf29596708c94b-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Convex Elicitation of Continuous Properties", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11984", "id": "11984", "author_site": "Jessica Finocchiaro, Rafael Frongillo", "author": "Jessica Finocchiaro; Rafael Frongillo", "abstract": "A property or statistic of a distribution is said to be elicitable if it can be expressed as the minimizer of some loss function in expectation. Recent work shows that continuous real-valued properties are elicitable if and only if they are identifiable, meaning the set of distributions with the same property value can be described by linear constraints. From a practical standpoint, one may ask for which such properties do there exist convex loss functions. In this paper, in a finite-outcome setting, we show that in fact every elicitable real-valued property can be elicited by a convex loss function. Our proof is constructive, and leads to convex loss functions for new properties.", "bibtex": "@inproceedings{NEURIPS2018_e9510081,\n author = {Finocchiaro, Jessica and Frongillo, Rafael},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Convex Elicitation of Continuous Properties},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e9510081ac30ffa83f10b68cde1cac07-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e9510081ac30ffa83f10b68cde1cac07-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e9510081ac30ffa83f10b68cde1cac07-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e9510081ac30ffa83f10b68cde1cac07-Reviews.html", "metareview": "", "pdf_size": 577108, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17921411150945091279&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science, University of Colorado, Boulder; Department of Computer Science, University of Colorado, Boulder", "aff_domain": "colorado.edu;colorado.edu", "email": "colorado.edu;colorado.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e9510081ac30ffa83f10b68cde1cac07-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Colorado", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.colorado.edu", "aff_unique_abbr": "CU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Boulder", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Cooperative Holistic Scene Understanding: Unifying 3D Object, Layout, and Camera Pose Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11047", "id": "11047", "author_site": "Siyuan Huang, Siyuan Qi, Yinxue Xiao, Yixin Zhu, Ying Nian Wu, Song-Chun Zhu", "author": "Siyuan Huang; Siyuan Qi; Yinxue Xiao; Yixin Zhu; Ying Nian Wu; Song-Chun Zhu", "abstract": "Holistic 3D indoor scene understanding refers to jointly recovering the i) object bounding boxes, ii) room layout, and iii) camera pose, all in 3D. The existing methods either are ineffective or only tackle the problem partially. In this paper, we propose an end-to-end model that simultaneously solves all three tasks in real-time given only a single RGB image. The essence of the proposed method is to improve the prediction by i) parametrizing the targets (e.g., 3D boxes) instead of directly estimating the targets, and ii) cooperative training across different modules in contrast to training these modules individually. Specifically, we parametrize the 3D object bounding boxes by the predictions from several modules, i.e., 3D camera pose and object attributes. The proposed method provides two major advantages: i) The parametrization helps maintain the consistency between the 2D image and the 3D world, thus largely reducing the prediction variances in 3D coordinates. ii) Constraints can be imposed on the parametrization to train different modules simultaneously. We call these constraints \"cooperative losses\" as they enable the joint training and inference. We employ three cooperative losses for 3D bounding boxes, 2D projections, and physical constraints to estimate a geometrically consistent and physically plausible 3D scene. Experiments on the SUN RGB-D dataset shows that the proposed method significantly outperforms prior approaches on 3D layout estimation, 3D object detection, 3D camera pose estimation, and holistic scene understanding.", "bibtex": "@inproceedings{NEURIPS2018_82161242,\n author = {Huang, Siyuan and Qi, Siyuan and Xiao, Yinxue and Zhu, Yixin and Wu, Ying Nian and Zhu, Song-Chun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Cooperative Holistic Scene Understanding: Unifying 3D Object, Layout, and Camera Pose Estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/82161242827b703e6acf9c726942a1e4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/82161242827b703e6acf9c726942a1e4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/82161242827b703e6acf9c726942a1e4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/82161242827b703e6acf9c726942a1e4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/82161242827b703e6acf9c726942a1e4-Reviews.html", "metareview": "", "pdf_size": 3431518, "gs_citation": 113, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5227625249975009897&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Dept. of Statistics, UCLA+Dept. of Computer Science, UCLA; Dept. of Computer Science, UCLA; Dept. of Computer Science, UCLA; Dept. of Statistics, UCLA; Dept. of Statistics, UCLA; Dept. of Statistics, UCLA+Dept. of Computer Science, UCLA", "aff_domain": "ucla.edu;cs.ucla.edu;ucla.edu;ucla.edu;stat.ucla.edu;stat.ucla.edu", "email": "ucla.edu;cs.ucla.edu;ucla.edu;ucla.edu;stat.ucla.edu;stat.ucla.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/82161242827b703e6acf9c726942a1e4-Abstract.html", "aff_unique_index": "0+0;0;0;0;0;0+0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "Department of Statistics", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0+0;0;0;0;0;0+0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0+0;0;0;0;0;0+0", "aff_country_unique": "United States" }, { "title": "Cooperative Learning of Audio and Video Models from Self-Supervised Synchronization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11745", "id": "11745", "author_site": "Bruno Korbar, Du Tran, Lorenzo Torresani", "author": "Bruno Korbar; Du Tran; Lorenzo Torresani", "abstract": "There is a natural correlation between the visual and auditive elements of a video. In this work we leverage this connection to learn general and effective models for both audio and video analysis from self-supervised temporal synchronization. We demonstrate that a calibrated curriculum learning scheme, a careful choice of negative examples, and the use of a contrastive loss are critical ingredients to obtain powerful multi-sensory representations from models optimized to discern temporal synchronization of audio-video pairs. Without further fine-tuning, the resulting audio features achieve performance superior or comparable to the state-of-the-art on established audio classification benchmarks (DCASE2014 and ESC-50). At the same time, our visual subnet provides a very effective initialization to improve the accuracy of video-based action recognition models: compared to learning from scratch, our self-supervised pretraining yields a remarkable gain of +19.9% in action recognition accuracy on UCF101 and a boost of +17.7% on HMDB51.", "bibtex": "@inproceedings{NEURIPS2018_c4616f5a,\n author = {Korbar, Bruno and Tran, Du and Torresani, Lorenzo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Cooperative Learning of Audio and Video Models from Self-Supervised Synchronization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c4616f5a24a66668f11ca4fa80525dc4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c4616f5a24a66668f11ca4fa80525dc4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c4616f5a24a66668f11ca4fa80525dc4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c4616f5a24a66668f11ca4fa80525dc4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c4616f5a24a66668f11ca4fa80525dc4-Reviews.html", "metareview": "", "pdf_size": 4147500, "gs_citation": 570, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3895941560673463494&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Dartmouth College; Facebook Research; Dartmouth College", "aff_domain": "dartmouth.edu;fb.com;dartmouth.edu", "email": "dartmouth.edu;fb.com;dartmouth.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c4616f5a24a66668f11ca4fa80525dc4-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Dartmouth College;Meta", "aff_unique_dep": ";Facebook Research", "aff_unique_url": "https://www.dartmouth.edu;https://research.facebook.com", "aff_unique_abbr": "Dartmouth;FB Research", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Cooperative neural networks (CoNN): Exploiting prior independence structure for improved classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11409", "id": "11409", "author_site": "Harsh Shrivastava, Eugene Bart, Bob Price, Hanjun Dai, Bo Dai, Srinivas Aluru", "author": "Harsh Shrivastava; Eugene Bart; Bob Price; Hanjun Dai; Bo Dai; Srinivas Aluru", "abstract": "We propose a new approach, called cooperative neural networks (CoNN), which use a set of cooperatively trained neural networks to capture latent representations that exploit prior given independence structure. The model is more flexible than traditional graphical models based on exponential family distributions, but incorporates more domain specific prior structure than traditional deep networks or variational autoencoders. The framework is very general and can be used to exploit the independence structure of any graphical model. We illustrate the technique by showing that we can transfer the independence structure of the popular Latent Dirichlet Allocation (LDA) model to a cooperative neural network, CoNN-sLDA. Empirical evaluation of CoNN-sLDA on supervised text classification tasks demonstrate that the theoretical advantages of prior independence structure can be realized in practice - we demonstrate a 23 percent reduction in error on the challenging MultiSent data set compared to state-of-the-art.", "bibtex": "@inproceedings{NEURIPS2018_05192834,\n author = {Shrivastava, Harsh and Bart, Eugene and Price, Bob and Dai, Hanjun and Dai, Bo and Aluru, Srinivas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Cooperative neural networks (CoNN): Exploiting prior independence structure for improved classification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/051928341be67dcba03f0e04104d9047-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/051928341be67dcba03f0e04104d9047-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/051928341be67dcba03f0e04104d9047-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/051928341be67dcba03f0e04104d9047-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/051928341be67dcba03f0e04104d9047-Reviews.html", "metareview": "", "pdf_size": 959462, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10862454724126678270&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Georgia Tech; PARC; PARC; Georgia Tech; Georgia Tech; Georgia Tech", "aff_domain": "gatech.edu;parc.com;parc.com;gatech.edu;gatech.edu;cc.gatech.edu", "email": "gatech.edu;parc.com;parc.com;gatech.edu;gatech.edu;cc.gatech.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/051928341be67dcba03f0e04104d9047-Abstract.html", "aff_unique_index": "0;1;1;0;0;0", "aff_unique_norm": "Georgia Institute of Technology;Palo Alto Research Center", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.parc.com", "aff_unique_abbr": "Georgia Tech;PARC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Palo Alto", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Coordinate Descent with Bandit Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11881", "id": "11881", "author_site": "Farnood Salehi, Patrick Thiran, Elisa Celis", "author": "Farnood Salehi; Patrick Thiran; Elisa Celis", "abstract": "Coordinate descent methods minimize a cost function by updating a single decision variable (corresponding to one coordinate) at a time. Ideally, we would update the decision variable that yields the largest marginal decrease in the cost function. However, finding this coordinate would require checking all of them, which is not computationally practical. Therefore, we propose a new adaptive method for coordinate descent. First, we define a lower bound on the decrease of the cost function when a coordinate is updated and, instead of calculating this lower bound for all coordinates, we use a multi-armed bandit algorithm to learn which coordinates result in the largest marginal decrease and simultaneously perform coordinate descent. We show that our approach improves the convergence of the coordinate methods both theoretically and experimentally.", "bibtex": "@inproceedings{NEURIPS2018_36f4d832,\n author = {Salehi, Farnood and Thiran, Patrick and Celis, Elisa},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Coordinate Descent with Bandit Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/36f4d832825380f102846560a5104c90-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/36f4d832825380f102846560a5104c90-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/36f4d832825380f102846560a5104c90-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/36f4d832825380f102846560a5104c90-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/36f4d832825380f102846560a5104c90-Reviews.html", "metareview": "", "pdf_size": 1492500, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13766190813544832476&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "School of Computer and Communication Sciences, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL); School of Computer and Communication Sciences, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL); School of Computer and Communication Sciences, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL)", "aff_domain": "epfl.ch;epfl.ch;epfl.ch", "email": "epfl.ch;epfl.ch;epfl.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/36f4d832825380f102846560a5104c90-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "School of Computer and Communication Sciences", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Coupled Variational Bayes via Optimization Embedding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11921", "id": "11921", "author_site": "Bo Dai, Hanjun Dai, Niao He, Weiyang Liu, Zhen Liu, Jianshu Chen, Lin Xiao, Le Song", "author": "Bo Dai; Hanjun Dai; Niao He; Weiyang Liu; Zhen Liu; Jianshu Chen; Lin Xiao; Le Song", "abstract": "Variational inference plays a vital role in learning graphical models, especially on large-scale datasets. Much of its success depends on a proper choice of auxiliary distribution class for posterior approximation. However, how to pursue an auxiliary distribution class that achieves both good approximation ability and computation efficiency remains a core challenge. In this paper, we proposed coupled variational Bayes which exploits the primal-dual view of the ELBO with the variational distribution class generated by an optimization procedure, which is termed optimization embedding. This flexible function class couples the variational distribution with the original parameters in the graphical models, allowing end-to-end learning of the graphical models by back-propagation through the variational distribution. Theoretically, we establish an interesting connection to gradient flow and demonstrate the extreme flexibility of this implicit distribution family in the limit sense. Empirically, we demonstrate the effectiveness of the proposed method on multiple graphical models with either continuous or discrete latent variables comparing to state-of-the-art methods.", "bibtex": "@inproceedings{NEURIPS2018_6aaba9a1,\n author = {Dai, Bo and Dai, Hanjun and He, Niao and Liu, Weiyang and Liu, Zhen and Chen, Jianshu and Xiao, Lin and Song, Le},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Coupled Variational Bayes via Optimization Embedding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6aaba9a124857622930ca4e50f5afed2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6aaba9a124857622930ca4e50f5afed2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6aaba9a124857622930ca4e50f5afed2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6aaba9a124857622930ca4e50f5afed2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6aaba9a124857622930ca4e50f5afed2-Reviews.html", "metareview": "", "pdf_size": 2099624, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9010555957492755231&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Georgia Institute of Technology+Google Brain; Georgia Institute of Technology; University of Illinois at Urbana Champaign; Georgia Institute of Technology; Georgia Institute of Technology; Tencent AI; Microsoft Research; Georgia Institute of Technology+Ant Financial", "aff_domain": "gatech.edu;gatech.edu;illinois.edu;gatech.edu;gatech.edu;tencent.com;microsoft.com;gatech.edu", "email": "gatech.edu;gatech.edu;illinois.edu;gatech.edu;gatech.edu;tencent.com;microsoft.com;gatech.edu", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6aaba9a124857622930ca4e50f5afed2-Abstract.html", "aff_unique_index": "0+1;0;2;0;0;3;4;0+5", "aff_unique_norm": "Georgia Institute of Technology;Google;University of Illinois Urbana-Champaign;Tencent;Microsoft;Ant Financial", "aff_unique_dep": ";Google Brain;;Tencent AI;Microsoft Research;", "aff_unique_url": "https://www.gatech.edu;https://brain.google.com;https://illinois.edu;https://ai.tencent.com;https://www.microsoft.com/en-us/research;https://www.antgroup.com", "aff_unique_abbr": "Georgia Tech;Google Brain;UIUC;Tencent AI;MSR;Ant Financial", "aff_campus_unique_index": "1;2;", "aff_campus_unique": ";Mountain View;Urbana-Champaign", "aff_country_unique_index": "0+0;0;0;0;0;1;0;0+1", "aff_country_unique": "United States;China" }, { "title": "Credit Assignment For Collective Multiagent RL With Global Rewards", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11776", "id": "11776", "author_site": "Duc Thien Nguyen, Akshat Kumar, Hoong Chuin Lau", "author": "Duc Thien Nguyen; Akshat Kumar; Hoong Chuin Lau", "abstract": "Scaling decision theoretic planning to large multiagent systems is challenging due to uncertainty and partial observability in the environment. We focus on a multiagent planning model subclass, relevant to urban settings, where agent interactions are dependent on their ``collective influence'' on each other, rather than their identities. Unlike previous work, we address a general setting where system reward is not decomposable among agents. We develop collective actor-critic RL approaches for this setting, and address the problem of multiagent credit assignment, and computing low variance policy gradient estimates that result in faster convergence to high quality solutions. We also develop difference rewards based credit assignment methods for the collective setting. Empirically our new approaches provide significantly better solutions than previous methods in the presence of global rewards on two real world problems modeling taxi fleet optimization and multiagent patrolling, and a synthetic grid navigation domain.", "bibtex": "@inproceedings{NEURIPS2018_94bb077f,\n author = {Nguyen, Duc Thien and Kumar, Akshat and Lau, Hoong Chuin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Credit Assignment For Collective Multiagent RL With Global Rewards},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/94bb077f18daa6620efa5cf6e6f178d2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/94bb077f18daa6620efa5cf6e6f178d2-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/94bb077f18daa6620efa5cf6e6f178d2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/94bb077f18daa6620efa5cf6e6f178d2-Reviews.html", "metareview": "", "pdf_size": 859637, "gs_citation": 133, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4950313076964148668&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "School of Information Systems, Singapore Management University; School of Information Systems, Singapore Management University; School of Information Systems, Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg;smu.edu.sg", "email": "smu.edu.sg;smu.edu.sg;smu.edu.sg", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/94bb077f18daa6620efa5cf6e6f178d2-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "School of Information Systems", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Critical initialisation for deep signal propagation in noisy rectifier neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11557", "id": "11557", "author_site": "Arnu Pretorius, Elan van Biljon, Steve Kroon, Herman Kamper", "author": "Arnu Pretorius; Elan van Biljon; Steve Kroon; Herman Kamper", "abstract": "Stochastic regularisation is an important weapon in the arsenal of a deep learning practitioner. However, despite recent theoretical advances, our understanding of how noise influences signal propagation in deep neural networks remains limited. By extending recent work based on mean field theory, we develop a new framework for signal propagation in stochastic regularised neural networks. Our \\textit{noisy signal propagation} theory can incorporate several common noise distributions, including additive and multiplicative Gaussian noise as well as dropout. We use this framework to investigate initialisation strategies for noisy ReLU networks. We show that no critical initialisation strategy exists using additive noise, with signal propagation exploding regardless of the selected noise distribution. For multiplicative noise (e.g.\\ dropout), we identify alternative critical initialisation strategies that depend on the second moment of the noise distribution. Simulations and experiments on real-world data confirm that our proposed initialisation is able to stably propagate signals in deep networks, while using an initialisation disregarding noise fails to do so. Furthermore, we analyse correlation dynamics between inputs. Stronger noise regularisation is shown to reduce the depth to which discriminatory information about the inputs to a noisy ReLU network is able to propagate, even when initialised at criticality. We support our theoretical predictions for these trainable depths with simulations, as well as with experiments on MNIST and CIFAR-10.", "bibtex": "@inproceedings{NEURIPS2018_045cf83a,\n author = {Pretorius, Arnu and van Biljon, Elan and Kroon, Steve and Kamper, Herman},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Critical initialisation for deep signal propagation in noisy rectifier neural networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/045cf83ab0722e782cf72d14e44adf98-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/045cf83ab0722e782cf72d14e44adf98-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/045cf83ab0722e782cf72d14e44adf98-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/045cf83ab0722e782cf72d14e44adf98-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/045cf83ab0722e782cf72d14e44adf98-Reviews.html", "metareview": "", "pdf_size": 2465675, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1536287201347762714&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Computer Science Division+CAIR\u2020,Stellenbosch University; Computer Science Division,Stellenbosch University; Computer Science Division,Stellenbosch University; Department of Electrical and Electronic Engineering,Stellenbosch University", "aff_domain": "gmail.com; ; ; ", "email": "gmail.com; ; ; ", "github": "https://github.com/ElanVB/noisy_signal_prop", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/045cf83ab0722e782cf72d14e44adf98-Abstract.html", "aff_unique_index": "0+1;1;1;1", "aff_unique_norm": "University of California, Berkeley;Stellenbosch University", "aff_unique_dep": "Computer Science Division;CAIR", "aff_unique_url": "https://www.cs.berkeley.edu;https://www.sun.ac.za", "aff_unique_abbr": "UC Berkeley;", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Berkeley;;Stellenbosch", "aff_country_unique_index": "0+1;1;1;1", "aff_country_unique": "United States;South Africa" }, { "title": "DAGs with NO TEARS: Continuous Optimization for Structure Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11901", "id": "11901", "author_site": "Xun Zheng, Bryon Aragam, Pradeep Ravikumar, Eric Xing", "author": "Xun Zheng; Bryon Aragam; Pradeep K Ravikumar; Eric P Xing", "abstract": "Estimating the structure of directed acyclic graphs (DAGs, also known as Bayesian networks) is a challenging problem since the search space of DAGs is combinatorial and scales superexponentially with the number of nodes. Existing approaches rely on various local heuristics for enforcing the acyclicity constraint. In this paper, we introduce a fundamentally different strategy: we formulate the structure learning problem as a purely continuous optimization problem over real matrices that avoids this combinatorial constraint entirely. \nThis is achieved by a novel characterization of acyclicity that is not only smooth but also exact. The resulting problem can be efficiently solved by standard numerical algorithms, which also makes implementation effortless. The proposed method outperforms existing ones, without imposing any structural assumptions on the graph such as bounded treewidth or in-degree.", "bibtex": "@inproceedings{NEURIPS2018_e347c514,\n author = {Zheng, Xun and Aragam, Bryon and Ravikumar, Pradeep K and Xing, Eric P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {DAGs with NO TEARS: Continuous Optimization for Structure Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e347c51419ffb23ca3fd5050202f9c3d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e347c51419ffb23ca3fd5050202f9c3d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e347c51419ffb23ca3fd5050202f9c3d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e347c51419ffb23ca3fd5050202f9c3d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e347c51419ffb23ca3fd5050202f9c3d-Reviews.html", "metareview": "", "pdf_size": 1519331, "gs_citation": 1237, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7128195536288105484&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University+Petuum Inc.", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e347c51419ffb23ca3fd5050202f9c3d-Abstract.html", "aff_unique_index": "0;0;0;0+1", "aff_unique_norm": "Carnegie Mellon University;Petuum Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.petuum.com", "aff_unique_abbr": "CMU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0+0", "aff_country_unique": "United States" }, { "title": "DVAE#: Discrete Variational Autoencoders with Relaxed Boltzmann Priors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11199", "id": "11199", "author_site": "Arash Vahdat, Evgeny Andriyash, William Macready", "author": "Arash Vahdat; Evgeny Andriyash; William Macready", "abstract": "Boltzmann machines are powerful distributions that have been shown to be an effective prior over binary latent variables in variational autoencoders (VAEs). However, previous methods for training discrete VAEs have used the evidence lower bound and not the tighter importance-weighted bound. We propose two approaches for relaxing Boltzmann machines to continuous distributions that permit training with importance-weighted bounds. These relaxations are based on generalized overlapping transformations and the Gaussian integral trick. Experiments on the MNIST and OMNIGLOT datasets show that these relaxations outperform previous discrete VAEs with Boltzmann priors. An implementation which reproduces these results is available.", "bibtex": "@inproceedings{NEURIPS2018_9f53d83e,\n author = {Vahdat, Arash and Andriyash, Evgeny and Macready, William},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {DVAE\\#: Discrete Variational Autoencoders with Relaxed Boltzmann Priors},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9f53d83ec0691550f7d2507d57f4f5a2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9f53d83ec0691550f7d2507d57f4f5a2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9f53d83ec0691550f7d2507d57f4f5a2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9f53d83ec0691550f7d2507d57f4f5a2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9f53d83ec0691550f7d2507d57f4f5a2-Reviews.html", "metareview": "", "pdf_size": 827769, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7993310290372230933&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Quadrant.ai + D-Wave Systems Inc.; Quadrant.ai + D-Wave Systems Inc.; Quadrant.ai + D-Wave Systems Inc.", "aff_domain": "quadrant.ai;quadrant.ai;quadrant.ai", "email": "quadrant.ai;quadrant.ai;quadrant.ai", "github": "https://github.com/QuadrantAI/dvae", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9f53d83ec0691550f7d2507d57f4f5a2-Abstract.html", "aff_unique_index": "0+1;0+1;0+1", "aff_unique_norm": "Quadrant.ai;D-Wave Systems Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.quadrant.ai;https://www.dwavesys.com", "aff_unique_abbr": "Quadrant.ai;D-Wave", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0+1;0+1;0+1", "aff_country_unique": "United States;Canada" }, { "title": "Data Amplification: A Unified and Competitive Approach to Property Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11843", "id": "11843", "author_site": "Yi Hao, Alon Orlitsky, Ananda Theertha Suresh, Yihong Wu", "author": "Yi Hao; Alon Orlitsky; Ananda Theertha Suresh; Yihong Wu", "abstract": "Estimating properties of discrete distributions is a fundamental problem in statistical learning. We design the first unified, linear-time, competitive, property estimator that for a wide class of properties and for all underlying distributions uses just 2n samples to achieve the performance attained by the empirical estimator with n\\sqrt{\\log n} samples. This provides off-the-shelf, distribution-independent, ``amplification'' of the amount of data available relative to common-practice estimators.", "bibtex": "@inproceedings{NEURIPS2018_a753a435,\n author = {Hao, Yi and Orlitsky, Alon and Suresh, Ananda Theertha and Wu, Yihong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Data Amplification: A Unified and Competitive Approach to Property Estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a753a43564c29148df3150afb4475440-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a753a43564c29148df3150afb4475440-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a753a43564c29148df3150afb4475440-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a753a43564c29148df3150afb4475440-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a753a43564c29148df3150afb4475440-Reviews.html", "metareview": "", "pdf_size": 643749, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4845126289487542953&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Dept. of Electrical and Computer Engineering, University of California, San Diego; Dept. of Electrical and Computer Engineering, University of California, San Diego; Google Research, New York; Dept. of Statistics and Data Science, Yale University", "aff_domain": "eng.ucsd.edu;eng.ucsd.edu;google.com;yale.edu", "email": "eng.ucsd.edu;eng.ucsd.edu;google.com;yale.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a753a43564c29148df3150afb4475440-Abstract.html", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of California, San Diego;Google;Yale University", "aff_unique_dep": "Dept. of Electrical and Computer Engineering;Google Research;Dept. of Statistics and Data Science", "aff_unique_url": "https://www.ucsd.edu;https://research.google;https://www.yale.edu", "aff_unique_abbr": "UCSD;Google;Yale", "aff_campus_unique_index": "0;0;1;2", "aff_campus_unique": "San Diego;New York;New Haven", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Data center cooling using model-predictive control", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11380", "id": "11380", "author_site": "Nevena Lazic, Craig Boutilier, Tyler Lu, Eehern Wong, Binz Roy, Moonkyung Ryu, Greg Imwalle", "author": "Nevena Lazic; Craig Boutilier; Tyler Lu; Eehern Wong; Binz Roy; MK Ryu; Greg Imwalle", "abstract": "Despite impressive recent advances in reinforcement learning (RL), its deployment in real-world physical systems is often complicated by unexpected events, limited data, and the potential for expensive failures. In this paper, we describe an application of RL \u201cin the wild\u201d to the task of regulating temperatures and airflow inside a large-scale data center (DC). Adopting a data-driven, model-based approach, we demonstrate that an RL agent with little prior knowledge is able to effectively and safely regulate conditions on a server floor after just a few hours of exploration, while improving operational efficiency relative to existing PID controllers.", "bibtex": "@inproceedings{NEURIPS2018_059fdcd9,\n author = {Lazic, Nevena and Boutilier, Craig and Lu, Tyler and Wong, Eehern and Roy, Binz and Ryu, MK and Imwalle, Greg},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Data center cooling using model-predictive control},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/059fdcd96baeb75112f09fa1dcc740cc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/059fdcd96baeb75112f09fa1dcc740cc-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/059fdcd96baeb75112f09fa1dcc740cc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/059fdcd96baeb75112f09fa1dcc740cc-Reviews.html", "metareview": "", "pdf_size": 741427, "gs_citation": 256, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14828906815888446302&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Google Research; Google Research; Google Research; Google Research; Google Cloud; Google Cloud; Google Cloud", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/059fdcd96baeb75112f09fa1dcc740cc-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Data-Driven Clustering via Parameterized Lloyd's Families", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12006", "id": "12006", "author_site": "Maria-Florina Balcan, Travis Dick, Colin White", "author": "Maria-Florina F Balcan; Travis Dick; Colin White", "abstract": "Algorithms for clustering points in metric spaces is a long-studied area of research. Clustering has seen a multitude of work both theoretically, in understanding the approximation guarantees possible for many objective functions such as k-median and k-means clustering, and experimentally, in finding the fastest algorithms and seeding procedures for Lloyd's algorithm. The performance of a given clustering algorithm depends on the specific application at hand, and this may not be known up front. For example, a \"typical instance\" may vary depending on the application, and different clustering heuristics perform differently depending on the instance.", "bibtex": "@inproceedings{NEURIPS2018_128ac9c4,\n author = {Balcan, Maria-Florina F and Dick, Travis and White, Colin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Data-Driven Clustering via Parameterized Lloyd\\textquotesingle s Families},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/128ac9c427302b7a64314fc4593430b2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/128ac9c427302b7a64314fc4593430b2-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/128ac9c427302b7a64314fc4593430b2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/128ac9c427302b7a64314fc4593430b2-Reviews.html", "metareview": "", "pdf_size": 488194, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13127720677020938343&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Computer Science, Carnegie-Mellon University, Pittsburgh, PA 15213; Department of Computer Science, Carnegie-Mellon University, Pittsburgh, PA 15213; Department of Computer Science, Carnegie-Mellon University, Pittsburgh, PA 15213", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/128ac9c427302b7a64314fc4593430b2-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Data-Efficient Hierarchical Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11333", "id": "11333", "author_site": "Ofir Nachum, Shixiang (Shane) Gu, Honglak Lee, Sergey Levine", "author": "Ofir Nachum; Shixiang (Shane) Gu; Honglak Lee; Sergey Levine", "abstract": "Hierarchical reinforcement learning (HRL) is a promising approach to extend traditional reinforcement learning (RL) methods to solve more complex tasks. Yet, the majority of current HRL methods require careful task-specific design and on-policy training, making them difficult to apply in real-world scenarios. In this paper, we study how we can develop HRL algorithms that are general, in that they do not make onerous additional assumptions beyond standard RL algorithms, and efficient, in the sense that they can be used with modest numbers of interaction samples, making them suitable for real-world problems such as robotic control. For generality, we develop a scheme where lower-level controllers are supervised with goals that are learned and proposed automatically by the higher-level controllers. To address efficiency, we propose to use off-policy experience for both higher- and lower-level training. This poses a considerable challenge, since changes to the lower-level behaviors change the action space for the higher-level policy, and we introduce an off-policy correction to remedy this challenge. This allows us to take advantage of recent advances in off-policy model-free RL to learn both higher and lower-level policies using substantially fewer environment interactions than on-policy algorithms. We find that our resulting HRL agent is generally applicable and highly sample-efficient. Our experiments show that our method can be used to learn highly complex behaviors for simulated robots, such as pushing objects and utilizing them to reach target locations, learning from only a few million samples, equivalent to a few days of real-time interaction. In comparisons with a number of prior HRL methods, we find that our approach substantially outperforms previous state-of-the-art techniques.", "bibtex": "@inproceedings{NEURIPS2018_e6384711,\n author = {Nachum, Ofir and Gu, Shixiang (Shane) and Lee, Honglak and Levine, Sergey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Data-Efficient Hierarchical Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e6384711491713d29bc63fc5eeb5ba4f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e6384711491713d29bc63fc5eeb5ba4f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e6384711491713d29bc63fc5eeb5ba4f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e6384711491713d29bc63fc5eeb5ba4f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e6384711491713d29bc63fc5eeb5ba4f-Reviews.html", "metareview": "", "pdf_size": 2933026, "gs_citation": 1158, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8228365515476642671&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Google Brain; Google Brain + University of Cambridge + Max Planck Institute of Intelligent Systems; Google Brain; Google Brain + UC Berkeley", "aff_domain": "google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com", "github": "https://github.com/tensorflow/models/tree/master/research/efficient-hrl", "project": "https://sites.google.com/view/efficient-hrl", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e6384711491713d29bc63fc5eeb5ba4f-Abstract.html", "aff_unique_index": "0;0+1+2;0;0+3", "aff_unique_norm": "Google;University of Cambridge;Max Planck Institute of Intelligent Systems;University of California, Berkeley", "aff_unique_dep": "Google Brain;;Intelligent Systems;", "aff_unique_url": "https://brain.google.com;https://www.cam.ac.uk;https://www.mpi-is.mpg.de;https://www.berkeley.edu", "aff_unique_abbr": "Google Brain;Cambridge;MPI-IS;UC Berkeley", "aff_campus_unique_index": "0;0+1;0;0+3", "aff_campus_unique": "Mountain View;Cambridge;;Berkeley", "aff_country_unique_index": "0;0+1+2;0;0+0", "aff_country_unique": "United States;United Kingdom;Germany" }, { "title": "Data-dependent PAC-Bayes priors via differential privacy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11806", "id": "11806", "author_site": "Gintare Karolina Dziugaite, Daniel Roy", "author": "Gintare Karolina Dziugaite; Daniel M. Roy", "abstract": "The Probably Approximately Correct (PAC) Bayes framework (McAllester, 1999) can incorporate knowledge about the learning algorithm and (data) distribution through the use of distribution-dependent priors, yielding tighter generalization bounds on data-dependent posteriors. Using this flexibility, however, is difficult, especially when the data distribution is presumed to be unknown. We show how a differentially private data-dependent prior yields a valid PAC-Bayes bound, and then show how non-private mechanisms for choosing priors can also yield generalization bounds. As an application of this result, we show that a Gaussian prior mean chosen via stochastic gradient Langevin dynamics (SGLD; Welling and Teh, 2011) leads to a valid PAC-Bayes bound due to control of the 2-Wasserstein distance to a differentially private stationary distribution. We study our data-dependent bounds empirically, and show that they can be nonvacuous even when other distribution-dependent bounds are vacuous.", "bibtex": "@inproceedings{NEURIPS2018_9a0ee0a9,\n author = {Dziugaite, Gintare Karolina and Roy, Daniel M},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Data-dependent PAC-Bayes priors via differential privacy},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9a0ee0a9e7a42d2d69b8f86b3a0756b1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9a0ee0a9e7a42d2d69b8f86b3a0756b1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9a0ee0a9e7a42d2d69b8f86b3a0756b1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9a0ee0a9e7a42d2d69b8f86b3a0756b1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9a0ee0a9e7a42d2d69b8f86b3a0756b1-Reviews.html", "metareview": "", "pdf_size": 501936, "gs_citation": 166, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12635796025191751775&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "University of Cambridge + Element AI; University of Toronto + Vector Institute", "aff_domain": "cl.cam.ac.uk;utoronto.ca", "email": "cl.cam.ac.uk;utoronto.ca", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9a0ee0a9e7a42d2d69b8f86b3a0756b1-Abstract.html", "aff_unique_index": "0+1;2+3", "aff_unique_norm": "University of Cambridge;Element AI;University of Toronto;Vector Institute", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cam.ac.uk;https://www.elementai.com;https://www.utoronto.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "Cambridge;Element AI;U of T;Vector Institute", "aff_campus_unique_index": "0;", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0+1;1+1", "aff_country_unique": "United Kingdom;Canada" }, { "title": "Decentralize and Randomize: Faster Algorithm for Wasserstein Barycenters", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12017", "id": "12017", "author_site": "Pavel Dvurechenskii, Darina Dvinskikh, Alexander Gasnikov, Cesar Uribe, Angelia Nedich", "author": "Pavel Dvurechenskii; Darina Dvinskikh; Alexander Gasnikov; Cesar Uribe; Angelia Nedich", "abstract": "We study the decentralized distributed computation of discrete approximations for the regularized Wasserstein barycenter of a finite set of continuous probability measures distributedly stored over a network. We assume there is a network of agents/machines/computers, and each agent holds a private continuous probability measure and seeks to compute the barycenter of all the measures in the network by getting samples from its local measure and exchanging information with its neighbors. Motivated by this problem, we develop, and analyze, a novel accelerated primal-dual stochastic gradient method for general stochastic convex optimization problems with linear equality constraints. Then, we apply this method to the decen- tralized distributed optimization setting to obtain a new algorithm for the distributed semi-discrete regularized Wasserstein barycenter problem. Moreover, we show explicit non-asymptotic complexity for the proposed algorithm. Finally, we show the effectiveness of our method on the distributed computation of the regularized Wasserstein barycenter of univariate Gaussian and von Mises distributions, as well as some applications to image aggregation.", "bibtex": "@inproceedings{NEURIPS2018_161882dd,\n author = {Dvurechenskii, Pavel and Dvinskikh, Darina and Gasnikov, Alexander and Uribe, Cesar and Nedich, Angelia},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Decentralize and Randomize: Faster Algorithm for Wasserstein Barycenters},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/161882dd2d19c716819081aee2c08b98-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/161882dd2d19c716819081aee2c08b98-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/161882dd2d19c716819081aee2c08b98-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/161882dd2d19c716819081aee2c08b98-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/161882dd2d19c716819081aee2c08b98-Reviews.html", "metareview": "", "pdf_size": 611970, "gs_citation": 129, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5576098960538257478&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "Weierstrass Institute for Applied Analysis and Stochastics + Institute for Information Transmission Problems RAS; Weierstrass Institute for Applied Analysis and Stochastics + Institute for Information Transmission Problems RAS; Moscow Institute of Physics and Technology + Institute for Information Transmission Problems RAS; Massachusetts Institute of Technology; Arizona State University + Moscow Institute of Physics and Technology", "aff_domain": "wias-berlin.de;wias-berlin.de;yandex.ru;mit.edu;asu.edu", "email": "wias-berlin.de;wias-berlin.de;yandex.ru;mit.edu;asu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/161882dd2d19c716819081aee2c08b98-Abstract.html", "aff_unique_index": "0+1;0+1;2+1;3;4+2", "aff_unique_norm": "Weierstrass Institute for Applied Analysis and Stochastics;Institute for Information Transmission Problems;Moscow Institute of Physics and Technology;Massachusetts Institute of Technology;Arizona State University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.wias-berlin.de/;http://www.iitp.ru;https://www.mipt.ru/en;https://web.mit.edu;https://www.asu.edu", "aff_unique_abbr": "WIAS;IITP RAS;MIPT;MIT;ASU", "aff_campus_unique_index": ";;;", "aff_campus_unique": "", "aff_country_unique_index": "0+1;0+1;1+1;2;2+1", "aff_country_unique": "Germany;Russian Federation;United States" }, { "title": "Deep Anomaly Detection Using Geometric Transformations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11927", "id": "11927", "author_site": "Izhak Golan, Ran El-Yaniv", "author": "Izhak Golan; Ran El-Yaniv", "abstract": "We consider the problem of anomaly detection in images, and \npresent a new detection technique. Given a sample\nof images, all known to belong to a ``normal'' class (e.g., dogs), \nwe show how to train a deep neural model that can detect \nout-of-distribution images (i.e., non-dog objects). The main \nidea behind our scheme is to train a multi-class model to discriminate between\ndozens of geometric transformations applied on all the given images. The auxiliary expertise learned by the model generates feature detectors that effectively identify, at test time, anomalous images based on the softmax activation statistics of the model when applied on transformed images.\nWe present extensive experiments using the proposed detector, which indicate that our algorithm improves state-of-the-art methods by a wide margin.", "bibtex": "@inproceedings{NEURIPS2018_5e62d03a,\n author = {Golan, Izhak and El-Yaniv, Ran},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Anomaly Detection Using Geometric Transformations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5e62d03aec0d17facfc5355dd90d441c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5e62d03aec0d17facfc5355dd90d441c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5e62d03aec0d17facfc5355dd90d441c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5e62d03aec0d17facfc5355dd90d441c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5e62d03aec0d17facfc5355dd90d441c-Reviews.html", "metareview": "", "pdf_size": 329876, "gs_citation": 843, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15277146675093535725&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, Technion \u2013 Israel Institute of Technology, Haifa, Israel; Department of Computer Science, Technion \u2013 Israel Institute of Technology, Haifa, Israel", "aff_domain": "cs.technion.ac.il;cs.technion.ac.il", "email": "cs.technion.ac.il;cs.technion.ac.il", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5e62d03aec0d17facfc5355dd90d441c-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Technion \u2013 Israel Institute of Technology", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.technion.ac.il", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Haifa", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Deep Attentive Tracking via Reciprocative Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11205", "id": "11205", "author_site": "Shi Pu, YIBING SONG, Chao Ma, Honggang Zhang, Ming-Hsuan Yang", "author": "Shi Pu; Yibing Song; Chao Ma; Honggang Zhang; Ming-Hsuan Yang", "abstract": "Visual attention, derived from cognitive neuroscience, facilitates human perception on the most pertinent subset of the sensory data. Recently, significant efforts have been made to exploit attention schemes to advance computer vision systems. For visual tracking, it is often challenging to track target objects undergoing large appearance changes. Attention maps facilitate visual tracking by selectively paying attention to temporal robust features. Existing tracking-by-detection approaches mainly use additional attention modules to generate feature weights as the classifiers are not equipped with such mechanisms. In this paper, we propose a reciprocative learning algorithm to exploit visual attention for training deep classifiers. The proposed algorithm consists of feed-forward and backward operations to generate attention maps, which serve as regularization terms coupled with the original classification loss function for training. The deep classifier learns to attend to the regions of target objects robust to appearance changes. Extensive experiments on large-scale benchmark datasets show that the proposed attentive tracking method performs favorably against the state-of-the-art approaches.", "bibtex": "@inproceedings{NEURIPS2018_c32d9bf2,\n author = {Pu, Shi and Song, Yibing and Ma, Chao and Zhang, Honggang and Yang, Ming-Hsuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Attentive Tracking via Reciprocative Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c32d9bf27a3da7ec8163957080c8628e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c32d9bf27a3da7ec8163957080c8628e-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c32d9bf27a3da7ec8163957080c8628e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c32d9bf27a3da7ec8163957080c8628e-Reviews.html", "metareview": "", "pdf_size": 1561394, "gs_citation": 225, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15557244319439286179&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Beijing University of Posts and Telecommunications; Tencent AI Lab; Shanghai Jiao Tong University; Beijing University of Posts and Telecommunications; University of California at Merced", "aff_domain": "bupt.edu.cn;bupt.edu.cn;gmail.com;sjtu.edu.cn;ucmerced.edu", "email": "bupt.edu.cn;bupt.edu.cn;gmail.com;sjtu.edu.cn;ucmerced.edu", "github": "https://ybsong00.github.io/nips18_tracking/index", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c32d9bf27a3da7ec8163957080c8628e-Abstract.html", "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Tencent;Shanghai Jiao Tong University;University of California, Merced", "aff_unique_dep": ";Tencent AI Lab;;", "aff_unique_url": "http://www.bupt.edu.cn/;https://ai.tencent.com;https://www.sjtu.edu.cn;https://www.ucmerced.edu", "aff_unique_abbr": "BUPT;Tencent AI Lab;SJTU;UC Merced", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Beijing;;Merced", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Deep Defense: Training DNNs with Improved Adversarial Robustness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11066", "id": "11066", "author_site": "Ziang Yan, Yiwen Guo, Changshui Zhang", "author": "Ziang Yan; Yiwen Guo; Changshui Zhang", "abstract": "Despite the efficacy on a variety of computer vision tasks, deep neural networks (DNNs) are vulnerable to adversarial attacks, limiting their applications in security-critical systems. Recent works have shown the possibility of generating imperceptibly perturbed image inputs (a.k.a., adversarial examples) to fool well-trained DNN classifiers into making arbitrary predictions. To address this problem, we propose a training recipe named \"deep defense\". Our core idea is to integrate an adversarial perturbation-based regularizer into the classification objective, such that the obtained models learn to resist potential attacks, directly and precisely. The whole optimization problem is solved just like training a recursive network. Experimental results demonstrate that our method outperforms training with adversarial/Parseval regularizations by large margins on various datasets (including MNIST, CIFAR-10 and ImageNet) and different DNN architectures. Code and models for reproducing our results are available at https://github.com/ZiangYan/deepdefense.pytorch.", "bibtex": "@inproceedings{NEURIPS2018_8f121ce0,\n author = {Yan, Ziang and Guo, Yiwen and Zhang, Changshui},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Defense: Training DNNs with Improved Adversarial Robustness},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8f121ce07d74717e0b1f21d122e04521-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8f121ce07d74717e0b1f21d122e04521-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8f121ce07d74717e0b1f21d122e04521-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8f121ce07d74717e0b1f21d122e04521-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8f121ce07d74717e0b1f21d122e04521-Reviews.html", "metareview": "", "pdf_size": 483071, "gs_citation": 152, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6643757979178770669&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Institute for Arti\ufb01cial Intelligence, Tsinghua University (THUAI), State Key Lab of Intelligent Technologies and Systems, Beijing National Research Center for Information Science and Technology (BNRist), Department of Automation, Tsinghua University, Beijing, China+Intel Labs China; Intel Labs China; Institute for Arti\ufb01cial Intelligence, Tsinghua University (THUAI), State Key Lab of Intelligent Technologies and Systems, Beijing National Research Center for Information Science and Technology (BNRist), Department of Automation, Tsinghua University, Beijing, China", "aff_domain": "mails.tsinghua.edu.cn;intel.com;mail.tsinghua.edu.cn", "email": "mails.tsinghua.edu.cn;intel.com;mail.tsinghua.edu.cn", "github": "https://github.com/ZiangYan/deepdefense.pytorch", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8f121ce07d74717e0b1f21d122e04521-Abstract.html", "aff_unique_index": "0+1;1;0", "aff_unique_norm": "Tsinghua University;Intel", "aff_unique_dep": "Institute for Arti\ufb01cial Intelligence;Intel Labs", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.intel.cn", "aff_unique_abbr": "THU;Intel", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0+0;0;0", "aff_country_unique": "China" }, { "title": "Deep Dynamical Modeling and Control of Unsteady Fluid Flows", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11882", "id": "11882", "author_site": "Jeremy Morton, Antony Jameson, Mykel J Kochenderfer, Freddie Witherden", "author": "Jeremy Morton; Antony Jameson; Mykel J Kochenderfer; Freddie Witherden", "abstract": "The design of flow control systems remains a challenge due to the nonlinear nature of the equations that govern fluid flow. However, recent advances in computational fluid dynamics (CFD) have enabled the simulation of complex fluid flows with high accuracy, opening the possibility of using learning-based approaches to facilitate controller design. We present a method for learning the forced and unforced dynamics of airflow over a cylinder directly from CFD data. The proposed approach, grounded in Koopman theory, is shown to produce stable dynamical models that can predict the time evolution of the cylinder system over extended time horizons. Finally, by performing model predictive control with the learned dynamical models, we are able to find a straightforward, interpretable control law for suppressing vortex shedding in the wake of the cylinder.", "bibtex": "@inproceedings{NEURIPS2018_2b0aa0d9,\n author = {Morton, Jeremy and Jameson, Antony and Kochenderfer, Mykel J and Witherden, Freddie},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Dynamical Modeling and Control of Unsteady Fluid Flows},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2b0aa0d9e30ea3a55fc271ced8364536-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2b0aa0d9e30ea3a55fc271ced8364536-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2b0aa0d9e30ea3a55fc271ced8364536-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2b0aa0d9e30ea3a55fc271ced8364536-Reviews.html", "metareview": "", "pdf_size": 1140793, "gs_citation": 216, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8193012965395960760&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Aeronautics and Astronautics, Stanford University; Department of Aeronautics and Astronautics, Stanford University; Department of Aerospace Engineering, Texas A&M University; Department of Aeronautics and Astronautics, Stanford University", "aff_domain": "stanford.edu;stanford.edu;tamu.edu;stanford.edu", "email": "stanford.edu;stanford.edu;tamu.edu;stanford.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2b0aa0d9e30ea3a55fc271ced8364536-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Stanford University;Texas A&M University", "aff_unique_dep": "Department of Aeronautics and Astronautics;Department of Aerospace Engineering", "aff_unique_url": "https://www.stanford.edu;https://www.tamu.edu", "aff_unique_abbr": "Stanford;TAMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Deep Functional Dictionaries: Learning Consistent Semantic Structures on 3D Models from Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11072", "id": "11072", "author_site": "Minhyuk Sung, Hao Su, Ronald Yu, Leonidas Guibas", "author": "Minhyuk Sung; Hao Su; Ronald Yu; Leonidas Guibas", "abstract": "Various 3D semantic attributes such as segmentation masks, geometric features, keypoints, and materials can be encoded as per-point probe functions on 3D geometries. Given a collection of related 3D shapes, we consider how to jointly analyze such probe functions over different shapes, and how to discover common latent structures using a neural network \u2014 even in the absence of any correspondence information. Our network is trained on point cloud representations of shape geometry and associated semantic functions on that point cloud. These functions express a shared semantic understanding of the shapes but are not coordinated in any way. For example, in a segmentation task, the functions can be indicator functions of arbitrary sets of shape parts, with the particular combination involved not known to the network. Our network is able to produce a small dictionary of basis functions for each shape, a dictionary whose span includes the semantic functions provided for that shape. Even though our shapes have independent discretizations and no functional correspondences are provided, the network is able to generate latent bases, in a consistent order, that reflect the shared semantic structure among the shapes. We demonstrate the effectiveness of our technique in various segmentation and keypoint selection applications.", "bibtex": "@inproceedings{NEURIPS2018_49182f81,\n author = {Sung, Minhyuk and Su, Hao and Yu, Ronald and Guibas, Leonidas J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Functional Dictionaries: Learning Consistent Semantic Structures on 3D Models from Functions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/49182f81e6a13cf5eaa496d51fea6406-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/49182f81e6a13cf5eaa496d51fea6406-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/49182f81e6a13cf5eaa496d51fea6406-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/49182f81e6a13cf5eaa496d51fea6406-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/49182f81e6a13cf5eaa496d51fea6406-Reviews.html", "metareview": "", "pdf_size": 6071022, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9622270934005244916&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Stanford University; University of California San Diego; University of California San Diego; Stanford University", "aff_domain": "cs.stanford.edu;eng.ucsd.edu;ucsd.edu;cs.stanford.edu", "email": "cs.stanford.edu;eng.ucsd.edu;ucsd.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/49182f81e6a13cf5eaa496d51fea6406-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Stanford University;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://ucsd.edu", "aff_unique_abbr": "Stanford;UCSD", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Stanford;San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Deep Generative Markov State Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11395", "id": "11395", "author_site": "Hao Wu, Andreas Mardt, Luca Pasquali, Frank Noe", "author": "Hao Wu; Andreas Mardt; Luca Pasquali; Frank Noe", "abstract": "We propose a deep generative Markov State Model (DeepGenMSM) learning framework for inference of metastable dynamical systems and prediction of trajectories. After unsupervised training on time series data, the model contains (i) a probabilistic encoder that maps from high-dimensional configuration space to a small-sized vector indicating the membership to metastable (long-lived) states, (ii) a Markov chain that governs the transitions between metastable states and facilitates analysis of the long-time dynamics, and (iii) a generative part that samples the conditional distribution of configurations in the next time step. The model can be operated in a recursive fashion to generate trajectories to predict the system evolution from a defined starting state and propose new configurations. The DeepGenMSM is demonstrated to provide accurate estimates of the long-time kinetics and generate valid distributions for molecular dynamics (MD) benchmark systems. Remarkably, we show that DeepGenMSMs are able to make long time-steps in molecular configuration space and generate physically realistic structures in regions that were not seen in training data.", "bibtex": "@inproceedings{NEURIPS2018_deb54ffb,\n author = {Wu, Hao and Mardt, Andreas and Pasquali, Luca and Noe, Frank},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Generative Markov State Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/deb54ffb41e085fd7f69a75b6359c989-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/deb54ffb41e085fd7f69a75b6359c989-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/deb54ffb41e085fd7f69a75b6359c989-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/deb54ffb41e085fd7f69a75b6359c989-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/deb54ffb41e085fd7f69a75b6359c989-Reviews.html", "metareview": "", "pdf_size": 3048931, "gs_citation": 83, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12810935521124083676&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Dept. of Mathematics and Computer Science, Freie Universit\u00e4t Berlin, 14195 Berlin, Germany+School of Mathematical Sciences, Tongji University, Shanghai, 200092, P.R. China; Dept. of Mathematics and Computer Science, Freie Universit\u00e4t Berlin, 14195 Berlin, Germany; Dept. of Mathematics and Computer Science, Freie Universit\u00e4t Berlin, 14195 Berlin, Germany; Dept. of Mathematics and Computer Science, Freie Universit\u00e4t Berlin, 14195 Berlin, Germany", "aff_domain": "fu-berlin.de;fu-berlin.de;fu-berlin.de;fu-berlin.de", "email": "fu-berlin.de;fu-berlin.de;fu-berlin.de;fu-berlin.de", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/deb54ffb41e085fd7f69a75b6359c989-Abstract.html", "aff_unique_index": "0+1;0;0;0", "aff_unique_norm": "Freie Universit\u00e4t Berlin;Tongji University", "aff_unique_dep": "Dept. of Mathematics and Computer Science;School of Mathematical Sciences", "aff_unique_url": "https://www.fu-berlin.de;https://www.tongji.edu.cn", "aff_unique_abbr": "FU Berlin;Tongji", "aff_campus_unique_index": "0+1;0;0;0", "aff_campus_unique": "Berlin;Shanghai", "aff_country_unique_index": "0+1;0;0;0", "aff_country_unique": "Germany;China" }, { "title": "Deep Generative Models for Distribution-Preserving Lossy Compression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11576", "id": "11576", "author_site": "Michael Tschannen, Eirikur Agustsson, Mario Lucic", "author": "Michael Tschannen; Eirikur Agustsson; Mario Lucic", "abstract": "We propose and study the problem of distribution-preserving lossy compression. Motivated by recent advances in extreme image compression which allow to maintain artifact-free reconstructions even at very low bitrates, we propose to optimize the rate-distortion tradeoff under the constraint that the reconstructed samples follow the distribution of the training data. The resulting compression system recovers both ends of the spectrum: On one hand, at zero bitrate it learns a generative model of the data, and at high enough bitrates it achieves perfect reconstruction. Furthermore, for intermediate bitrates it smoothly interpolates between learning a generative model of the training data and perfectly reconstructing the training samples. We study several methods to approximately solve the proposed optimization problem, including a novel combination of Wasserstein GAN and Wasserstein Autoencoder, and present an extensive theoretical and empirical characterization of the proposed compression systems.", "bibtex": "@inproceedings{NEURIPS2018_801fd8c2,\n author = {Tschannen, Michael and Agustsson, Eirikur and Lucic, Mario},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Generative Models for Distribution-Preserving Lossy Compression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/801fd8c2a4e79c1d24a40dc735c051ae-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/801fd8c2a4e79c1d24a40dc735c051ae-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/801fd8c2a4e79c1d24a40dc735c051ae-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/801fd8c2a4e79c1d24a40dc735c051ae-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/801fd8c2a4e79c1d24a40dc735c051ae-Reviews.html", "metareview": "", "pdf_size": 883175, "gs_citation": 157, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10590142637711882209&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "ETH Z\u00fcrich; Google AI Perception; Google Brain", "aff_domain": "nari.ee.ethz.ch;google.com;google.com", "email": "nari.ee.ethz.ch;google.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/801fd8c2a4e79c1d24a40dc735c051ae-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "ETH Zurich;Google", "aff_unique_dep": ";Google AI Perception", "aff_unique_url": "https://www.ethz.ch;https://ai.google", "aff_unique_abbr": "ETHZ;Google AI", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Switzerland;United States" }, { "title": "Deep Generative Models with Learnable Knowledge Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11993", "id": "11993", "author_site": "Zhiting Hu, Zichao Yang, Russ Salakhutdinov, LIANHUI Qin, Xiaodan Liang, Haoye Dong, Eric Xing", "author": "Zhiting Hu; Zichao Yang; Ruslan Salakhutdinov; LIANHUI Qin; Xiaodan Liang; Haoye Dong; Eric P Xing", "abstract": "The broad set of deep generative models (DGMs) has achieved remarkable advances. However, it is often difficult to incorporate rich structured domain knowledge with the end-to-end DGMs. Posterior regularization (PR) offers a principled framework to impose structured constraints on probabilistic models, but has limited applicability to the diverse DGMs that can lack a Bayesian formulation or even explicit density evaluation. PR also requires constraints to be fully specified {\\it a priori}, which is impractical or suboptimal for complex knowledge with learnable uncertain parts. In this paper, we establish mathematical correspondence between PR and reinforcement learning (RL), and, based on the connection, expand PR to learn constraints as the extrinsic reward in RL. The resulting algorithm is model-agnostic to apply to any DGMs, and is flexible to adapt arbitrary constraints with the model jointly. Experiments on human image generation and templated sentence generation show models with learned knowledge constraints by our algorithm greatly improve over base generative models.", "bibtex": "@inproceedings{NEURIPS2018_d7e77c83,\n author = {Hu, Zhiting and Yang, Zichao and Salakhutdinov, Russ R and Qin, LIANHUI and Liang, Xiaodan and Dong, Haoye and Xing, Eric P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Generative Models with Learnable Knowledge Constraints},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d7e77c835af3d2a803c1cf28d60575bc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d7e77c835af3d2a803c1cf28d60575bc-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d7e77c835af3d2a803c1cf28d60575bc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d7e77c835af3d2a803c1cf28d60575bc-Reviews.html", "metareview": "", "pdf_size": 1768259, "gs_citation": 99, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16389072575294747722&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University; Petuum Inc.", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;petuum.com; ; ", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;petuum.com; ; ", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d7e77c835af3d2a803c1cf28d60575bc-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "Carnegie Mellon University;Petuum Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.petuum.com", "aff_unique_abbr": "CMU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Deep Homogeneous Mixture Models: Representation, Separation, and Approximation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11687", "id": "11687", "author_site": "Priyank Jaini, Pascal Poupart, Yaoliang Yu", "author": "Priyank Jaini; Pascal Poupart; Yaoliang Yu", "abstract": "At their core, many unsupervised learning models provide a compact representation of homogeneous density mixtures, but their similarities and differences are not always clearly understood. In this work, we formally establish the relationships among latent tree graphical models (including special cases such as hidden Markov models and tensorial mixture models), hierarchical tensor formats and sum-product networks. Based on this connection, we then give a unified treatment of exponential separation in \\emph{exact} representation size between deep mixture architectures and shallow ones. In contrast, for \\emph{approximate} representation, we show that the conditional gradient algorithm can approximate any homogeneous mixture within $\\epsilon$ accuracy by combining $O(1/\\epsilon^2)$ ``shallow'' architectures, where the hidden constant may decrease (exponentially) with respect to the depth. Our experiments on both synthetic and real datasets confirm the benefits of depth in density estimation.", "bibtex": "@inproceedings{NEURIPS2018_c5f5c23b,\n author = {Jaini, Priyank and Poupart, Pascal and Yu, Yaoliang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Homogeneous Mixture Models: Representation, Separation, and Approximation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c5f5c23be1b71adb51ea9dc8e9d444a8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c5f5c23be1b71adb51ea9dc8e9d444a8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c5f5c23be1b71adb51ea9dc8e9d444a8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c5f5c23be1b71adb51ea9dc8e9d444a8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c5f5c23be1b71adb51ea9dc8e9d444a8-Reviews.html", "metareview": "", "pdf_size": 530489, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14183575001112622215&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science & Waterloo AI Institute, University of Waterloo; University of Waterloo, Vector Institute & Waterloo AI Institute; Department of Computer Science & Waterloo AI Institute, University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca", "email": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c5f5c23be1b71adb51ea9dc8e9d444a8-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Waterloo", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Deep Network for the Integrated 3D Sensing of Multiple People in Natural Images", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11804", "id": "11804", "author_site": "Andrei Zanfir, Elisabeta Marinoiu, Mihai Zanfir, Alin-Ionut Popa, Cristian Sminchisescu", "author": "Andrei Zanfir; Elisabeta Marinoiu; Mihai Zanfir; Alin-Ionut Popa; Cristian Sminchisescu", "abstract": "We present MubyNet -- a feed-forward, multitask, bottom up system for the integrated localization, as well as 3d pose and shape estimation, of multiple people in monocular images. The challenge is the formal modeling of the problem that intrinsically requires discrete and continuous computation, e.g. grouping people vs. predicting 3d pose. The model identifies human body structures (joints and limbs) in images, groups them based on 2d and 3d information fused using learned scoring functions, and optimally aggregates such responses into partial or complete 3d human skeleton hypotheses under kinematic tree constraints, but without knowing in advance the number of people in the scene and their visibility relations. We design a multi-task deep neural network with differentiable stages where the person grouping problem is formulated as an integer program based on learned body part scores parameterized by both 2d and 3d information. This avoids suboptimality resulting from separate 2d and 3d reasoning, with grouping performed based on the combined representation. The final stage of 3d pose and shape prediction is based on a learned attention process where information from different human body parts is optimally integrated. State-of-the-art results are obtained in large scale datasets like Human3.6M and Panoptic, and qualitatively by reconstructing the 3d shape and pose of multiple people, under occlusion, in difficult monocular images.", "bibtex": "@inproceedings{NEURIPS2018_6a6610fe,\n author = {Zanfir, Andrei and Marinoiu, Elisabeta and Zanfir, Mihai and Popa, Alin-Ionut and Sminchisescu, Cristian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Network for the Integrated 3D Sensing of Multiple People in Natural Images},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6a6610feab86a1f294dbbf5855c74af9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6a6610feab86a1f294dbbf5855c74af9-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6a6610feab86a1f294dbbf5855c74af9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6a6610feab86a1f294dbbf5855c74af9-Reviews.html", "metareview": "", "pdf_size": 3232858, "gs_citation": 170, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7635797151927012288&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Institute of Mathematics of the Romanian Academy; Institute of Mathematics of the Romanian Academy; Institute of Mathematics of the Romanian Academy; Institute of Mathematics of the Romanian Academy; Department of Mathematics, Faculty of Engineering, Lund University + Institute of Mathematics of the Romanian Academy", "aff_domain": "imar.ro;imar.ro;imar.ro;imar.ro;math.lth.se", "email": "imar.ro;imar.ro;imar.ro;imar.ro;math.lth.se", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6a6610feab86a1f294dbbf5855c74af9-Abstract.html", "aff_unique_index": "0;0;0;0;1+0", "aff_unique_norm": "Romanian Academy;Lund University", "aff_unique_dep": "Institute of Mathematics;Department of Mathematics", "aff_unique_url": "https://www.math.ro/;https://www.lunduniversity.lu.se", "aff_unique_abbr": "IMAR;LU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1+0", "aff_country_unique": "Romania;Sweden" }, { "title": "Deep Neural Nets with Interpolating Function as Output Activation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11097", "id": "11097", "author_site": "Bao Wang, Xiyang Luo, Zhen Li, Wei Zhu, Zuoqiang Shi, Stanley Osher", "author": "Bao Wang; Xiyang Luo; Zhen Li; Wei Zhu; Zuoqiang Shi; Stanley Osher", "abstract": "We replace the output layer of deep neural nets, typically the softmax function, by a novel interpolating function. And we propose end-to-end training and testing algorithms for this new architecture. Compared to classical neural nets with softmax function as output activation, the surrogate with interpolating function as output activation combines advantages of both deep and manifold learning. The new framework demonstrates the following major advantages: First, it is better applicable to the case with insufficient training data. Second, it significantly improves the generalization accuracy on a wide variety of networks. The algorithm is implemented in PyTorch, and the code is available at https://github.com/\nBaoWangMath/DNN-DataDependentActivation.", "bibtex": "@inproceedings{NEURIPS2018_6ecbdd6e,\n author = {Wang, Bao and Luo, Xiyang and Li, Zhen and Zhu, Wei and Shi, Zuoqiang and Osher, Stanley},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Neural Nets with Interpolating Function as Output Activation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6ecbdd6ec859d284dc13885a37ce8d81-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6ecbdd6ec859d284dc13885a37ce8d81-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6ecbdd6ec859d284dc13885a37ce8d81-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6ecbdd6ec859d284dc13885a37ce8d81-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6ecbdd6ec859d284dc13885a37ce8d81-Reviews.html", "metareview": "", "pdf_size": 476127, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11891617585476435076&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Mathematics, University of California, Los Angeles; Department of Mathematics, University of California, Los Angeles; Department of Mathematics, HKUST, Hong Kong; Department of Mathematics, Duke University; Department of Mathematics, Tsinghua University; Department of Mathematics, University of California, Los Angeles", "aff_domain": "gmail.com;gmail.com;gmail.com;math.duke.edu;mail.tsinghua.edu.cn;math.ucla.edu", "email": "gmail.com;gmail.com;gmail.com;math.duke.edu;mail.tsinghua.edu.cn;math.ucla.edu", "github": "https://github.com/BaoWangMath/DNN-DataDependentActivation", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6ecbdd6ec859d284dc13885a37ce8d81-Abstract.html", "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "University of California, Los Angeles;Hong Kong University of Science and Technology;Duke University;Tsinghua University", "aff_unique_dep": "Department of Mathematics;Department of Mathematics;Department of Mathematics;Department of Mathematics", "aff_unique_url": "https://www.ucla.edu;https://www.hkust.edu.hk;https://www.duke.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UCLA;HKUST;Duke;THU", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Los Angeles;Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Deep Neural Networks with Box Convolutions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11602", "id": "11602", "author_site": "Egor Burkov, Victor Lempitsky", "author": "Egor Burkov; Victor Lempitsky", "abstract": "Box filters computed using integral images have been part of the computer vision toolset for a long time. Here, we show that a convolutional layer that computes box filter responses in a sliding manner can be used within deep architectures, whereas the dimensions and the offsets of the sliding boxes in such a layer can be learned as part of an end-to-end loss minimization. Crucially, the training process can make the size of the boxes in such a layer arbitrarily large without incurring extra computational cost and without the need to increase the number of learnable parameters. Due to its ability to integrate information over large boxes, the new layer facilitates long-range propagation of information and leads to the efficient increase of the receptive fields of downstream units in the network. By incorporating the new layer into existing architectures for semantic segmentation, we are able to achieve both the increase in segmentation accuracy as well as the decrease in the computational cost and the number of learnable parameters.", "bibtex": "@inproceedings{NEURIPS2018_8e489b49,\n author = {Burkov, Egor and Lempitsky, Victor},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Neural Networks with Box Convolutions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8e489b4966fe8f703b5be647f1cbae63-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8e489b4966fe8f703b5be647f1cbae63-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8e489b4966fe8f703b5be647f1cbae63-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8e489b4966fe8f703b5be647f1cbae63-Reviews.html", "metareview": "", "pdf_size": 420273, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15004510562166029998&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Samsung AI Center + Skolkovo Institute of Science and Technology (Skoltech); Samsung AI Center + Skolkovo Institute of Science and Technology (Skoltech)", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8e489b4966fe8f703b5be647f1cbae63-Abstract.html", "aff_unique_index": "0+1;0+1", "aff_unique_norm": "Samsung;Skolkovo Institute of Science and Technology", "aff_unique_dep": "AI Center;", "aff_unique_url": "https://www.samsung.com/global/careers/ai-center/;https://www.skoltech.ru", "aff_unique_abbr": "Samsung AI;Skoltech", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0+1;0+1", "aff_country_unique": "South Korea;Russian Federation" }, { "title": "Deep Non-Blind Deconvolution via Generalized Low-Rank Approximation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11055", "id": "11055", "author_site": "Wenqi Ren, Jiawei Zhang, Lin Ma, Jinshan Pan, Xiaochun Cao, Wangmeng Zuo, Wei Liu, Ming-Hsuan Yang", "author": "Wenqi Ren; Jiawei Zhang; Lin Ma; Jinshan Pan; Xiaochun Cao; Wangmeng Zuo; Wei Liu; Ming-Hsuan Yang", "abstract": "In this paper, we present a deep convolutional neural network to capture the inherent properties of image degradation, which can handle different kernels and saturated pixels in a unified framework. The proposed neural network is motivated by the low-rank property of pseudo-inverse kernels. We first compute a generalized low-rank approximation for a large number of blur kernels, and then use separable filters to initialize the convolutional parameters in the network. Our analysis shows that the estimated decomposed matrices contain the most essential information of the input kernel, which ensures the proposed network to handle various blurs in a unified framework and generate high-quality deblurring results. Experimental results on benchmark datasets with noise and saturated pixels demonstrate that the proposed algorithm performs favorably against state-of-the-art methods.", "bibtex": "@inproceedings{NEURIPS2018_0aa1883c,\n author = {Ren, Wenqi and Zhang, Jiawei and Ma, Lin and Pan, Jinshan and Cao, Xiaochun and Zuo, Wangmeng and Liu, Wei and Yang, Ming-Hsuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Non-Blind Deconvolution via Generalized Low-Rank Approximation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0aa1883c6411f7873cb83dacb17b0afc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0aa1883c6411f7873cb83dacb17b0afc-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0aa1883c6411f7873cb83dacb17b0afc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0aa1883c6411f7873cb83dacb17b0afc-Reviews.html", "metareview": "", "pdf_size": 8113682, "gs_citation": 99, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7574801222578138743&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "IIE, CAS; SenseTime Research; Tencent AI Lab; NJUST; IIE, CAS; HIT; Tencent AI Lab; UCMerced, Google Cloud", "aff_domain": "iie.ac.cn;sensetime.com;tencent.com;njust.edu.cn;iie.ac.cn;hit.edu.cn;tencent.com;ucmerced.edu", "email": "iie.ac.cn;sensetime.com;tencent.com;njust.edu.cn;iie.ac.cn;hit.edu.cn;tencent.com;ucmerced.edu", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0aa1883c6411f7873cb83dacb17b0afc-Abstract.html", "aff_unique_index": "0;1;2;3;0;4;2;5", "aff_unique_norm": "Institute of Electrical Engineers, Chinese Academy of Sciences;SenseTime;Tencent;Nanjing University of Science and Technology;Harbin Institute of Technology;University of California, Merced", "aff_unique_dep": ";SenseTime Research;Tencent AI Lab;;;", "aff_unique_url": "http://www.iie.cas.cn;https://www.sensetime.com;https://ai.tencent.com;http://www.njust.edu.cn;http://www.hit.edu.cn/;https://www.ucmerced.edu", "aff_unique_abbr": "IIE;SenseTime;Tencent AI Lab;NJUST;HIT;UC Merced", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Harbin;Merced", "aff_country_unique_index": "0;0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Deep Poisson gamma dynamical systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11807", "id": "11807", "author_site": "Dandan Guo, Bo Chen, Hao Zhang, Mingyuan Zhou", "author": "Dandan Guo; Bo Chen; Hao Zhang; Mingyuan Zhou", "abstract": "We develop deep Poisson-gamma dynamical systems (DPGDS) to model sequentially observed multivariate count data, improving previously proposed models by not only mining deep hierarchical latent structure from the data, but also capturing both first-order and long-range temporal dependencies. Using sophisticated but simple-to-implement data augmentation techniques, we derived closed-form Gibbs sampling update equations by first backward and upward propagating auxiliary latent counts, and then forward and downward sampling latent variables. Moreover, we develop stochastic gradient MCMC inference that is scalable to very long multivariate count time series. Experiments on both synthetic and a variety of real-world data demonstrate that the proposed model not only has excellent predictive performance, but also provides highly interpretable multilayer latent structure to represent hierarchical and temporal information propagation.", "bibtex": "@inproceedings{NEURIPS2018_4ffb0d2b,\n author = {Guo, Dandan and Chen, Bo and Zhang, Hao and Zhou, Mingyuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Poisson gamma dynamical systems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4ffb0d2ba92f664c2281970110a2e071-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4ffb0d2ba92f664c2281970110a2e071-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4ffb0d2ba92f664c2281970110a2e071-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4ffb0d2ba92f664c2281970110a2e071-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4ffb0d2ba92f664c2281970110a2e071-Reviews.html", "metareview": "", "pdf_size": 1443826, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3704686609681675703&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "National Laboratory of Radar Signal Processing+Collaborative Innovation Center of Information Sensing and Understanding+Xidian University, Xi\u2019an, China; National Laboratory of Radar Signal Processing+Collaborative Innovation Center of Information Sensing and Understanding+Xidian University, Xi\u2019an, China; National Laboratory of Radar Signal Processing+Collaborative Innovation Center of Information Sensing and Understanding+Xidian University, Xi\u2019an, China; McCombs School of Business+The University of Texas at Austin, Austin, TX 78712, USA", "aff_domain": "126.com;mail.xidian.edu.cn;163.com;mccombs.utexas.edu", "email": "126.com;mail.xidian.edu.cn;163.com;mccombs.utexas.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4ffb0d2ba92f664c2281970110a2e071-Abstract.html", "aff_unique_index": "0+1+2;0+1+2;0+1+2;3+3", "aff_unique_norm": "National Laboratory of Radar Signal Processing;Collaborative Innovation Center;Xidian University;University of Texas at Austin", "aff_unique_dep": ";Information Sensing and Understanding;;McCombs School of Business", "aff_unique_url": ";;http://www.xidian.edu.cn;https://mccombs.utexas.edu", "aff_unique_abbr": ";;Xidian;UT Austin", "aff_campus_unique_index": "1;1;1;2+2", "aff_campus_unique": ";Xi'an;Austin", "aff_country_unique_index": "0+0;0+0;0+0;2+2", "aff_country_unique": "China;;United States" }, { "title": "Deep Predictive Coding Network with Local Recurrent Processing for Object Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11877", "id": "11877", "author_site": "Kuan Han, Haiguang Wen, Yizhen Zhang, Di Fu, Eugenio Culurciello, Zhongming Liu", "author": "Kuan Han; Haiguang Wen; Yizhen Zhang; Di Fu; Eugenio Culurciello; Zhongming Liu", "abstract": "Inspired by \"predictive coding\" - a theory in neuroscience, we develop a bi-directional and dynamic neural network with local recurrent processing, namely predictive coding network (PCN). Unlike feedforward-only convolutional neural networks, PCN includes both feedback connections, which carry top-down predictions, and feedforward connections, which carry bottom-up errors of prediction. Feedback and feedforward connections enable adjacent layers to interact locally and recurrently to refine representations towards minimization of layer-wise prediction errors. When unfolded over time, the recurrent processing gives rise to an increasingly deeper hierarchy of non-linear transformation, allowing a shallow network to dynamically extend itself into an arbitrarily deep network. We train and test PCN for image classification with SVHN, CIFAR and ImageNet datasets. Despite notably fewer layers and parameters, PCN achieves competitive performance compared to classical and state-of-the-art models. Further analysis shows that the internal representations in PCN converge over time and yield increasingly better accuracy in object recognition. Errors of top-down prediction also reveal visual saliency or bottom-up attention.", "bibtex": "@inproceedings{NEURIPS2018_1c63926e,\n author = {Han, Kuan and Wen, Haiguang and Zhang, Yizhen and Fu, Di and Culurciello, Eugenio and Liu, Zhongming},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Predictive Coding Network with Local Recurrent Processing for Object Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1c63926ebcabda26b5cdb31b5cc91efb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1c63926ebcabda26b5cdb31b5cc91efb-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1c63926ebcabda26b5cdb31b5cc91efb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1c63926ebcabda26b5cdb31b5cc91efb-Reviews.html", "metareview": "", "pdf_size": 5257535, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=982895399250040522&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "School of Electrical and Computer Engineering, Purdue University + Purdue Institute for Integrative Neuroscience, Purdue University; School of Electrical and Computer Engineering, Purdue University + Purdue Institute for Integrative Neuroscience, Purdue University; School of Electrical and Computer Engineering, Purdue University + Purdue Institute for Integrative Neuroscience, Purdue University; School of Electrical and Computer Engineering, Purdue University + Purdue Institute for Integrative Neuroscience, Purdue University; School of Electrical and Computer Engineering, Purdue University + Weldon School of Biomedical Engineering, Purdue University; School of Electrical and Computer Engineering, Purdue University + Weldon School of Biomedical Engineering, Purdue University + Purdue Institute for Integrative Neuroscience, Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu;purdue.edu;purdue.edu;purdue.edu", "email": "purdue.edu;purdue.edu;purdue.edu;purdue.edu;purdue.edu;purdue.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1c63926ebcabda26b5cdb31b5cc91efb-Abstract.html", "aff_unique_index": "0+0;0+0;0+0;0+0;0+0;0+0+0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "School of Electrical and Computer Engineering", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": ";;;;;", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0+0;0+0;0+0;0+0;0+0+0", "aff_country_unique": "United States" }, { "title": "Deep Reinforcement Learning in a Handful of Trials using Probabilistic Dynamics Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11467", "id": "11467", "author_site": "Kurtland Chua, Roberto Calandra, Rowan McAllister, Sergey Levine", "author": "Kurtland Chua; Roberto Calandra; Rowan McAllister; Sergey Levine", "abstract": "Model-based reinforcement learning (RL) algorithms can attain excellent sample efficiency, but often lag behind the best model-free algorithms in terms of asymptotic performance. This is especially true with high-capacity parametric function approximators, such as deep networks. In this paper, we study how to bridge this gap, by employing uncertainty-aware dynamics models. We propose a new algorithm called probabilistic ensembles with trajectory sampling (PETS) that combines uncertainty-aware deep network dynamics models with sampling-based uncertainty propagation. Our comparison to state-of-the-art model-based and model-free deep RL algorithms shows that our approach matches the asymptotic performance of model-free algorithms on several challenging benchmark tasks, while requiring significantly fewer samples (e.g. 8 and 125 times fewer samples than Soft Actor Critic and Proximal Policy Optimization respectively on the half-cheetah task).", "bibtex": "@inproceedings{NEURIPS2018_3de568f8,\n author = {Chua, Kurtland and Calandra, Roberto and McAllister, Rowan and Levine, Sergey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Reinforcement Learning in a Handful of Trials using Probabilistic Dynamics Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3de568f8597b94bda53149c7d7f5958c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3de568f8597b94bda53149c7d7f5958c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3de568f8597b94bda53149c7d7f5958c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3de568f8597b94bda53149c7d7f5958c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3de568f8597b94bda53149c7d7f5958c-Reviews.html", "metareview": "", "pdf_size": 1730423, "gs_citation": 1719, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6248399848380977147&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3de568f8597b94bda53149c7d7f5958c-Abstract.html" }, { "title": "Deep Reinforcement Learning of Marked Temporal Point Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11321", "id": "11321", "author_site": "Utkarsh Upadhyay, Abir De, Manuel Gomez Rodriguez", "author": "Utkarsh Upadhyay; Abir De; Manuel Gomez Rodriguez", "abstract": "In a wide variety of applications, humans interact with a complex environment by means of asynchronous stochastic discrete events in continuous time. Can we design online interventions that will help humans achieve certain goals in such asynchronous setting? In this paper, we address the above problem from the perspective of deep reinforcement learning of marked temporal point processes, where both the actions taken by an agent and the feedback it receives from the environment are asynchronous stochastic discrete events characterized using marked temporal point processes. In doing so, we define the agent's policy using the intensity and mark distribution of the corresponding process and then derive \na flexible policy gradient method, which embeds the agent's actions and the feedback it receives into real-valued vectors using deep recurrent neural networks. Our method does not make any assumptions on the functional form of the intensity and mark distribution of the feedback and it allows for arbitrarily complex reward functions. We apply our methodology to two different applications in viral marketing and personalized teaching and, using data gathered from Twitter and Duolingo, we show that it may be able to find interventions to help marketers and learners achieve their goals more effectively than alternatives.", "bibtex": "@inproceedings{NEURIPS2018_71a58e8c,\n author = {Upadhyay, Utkarsh and De, Abir and Gomez Rodriguez, Manuel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Reinforcement Learning of Marked Temporal Point Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/71a58e8cb75904f24cde464161c3e766-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/71a58e8cb75904f24cde464161c3e766-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/71a58e8cb75904f24cde464161c3e766-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/71a58e8cb75904f24cde464161c3e766-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/71a58e8cb75904f24cde464161c3e766-Reviews.html", "metareview": "", "pdf_size": 8326988, "gs_citation": 137, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10991436220054749409&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "MPI-SWS; MPI-SWS; MPI-SWS", "aff_domain": "mpi-sws.org;mpi-sws.org;mpi-sws.org", "email": "mpi-sws.org;mpi-sws.org;mpi-sws.org", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/71a58e8cb75904f24cde464161c3e766-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Max Planck Institute for Software Systems", "aff_unique_dep": "", "aff_unique_url": "https://www.mpi-sws.org", "aff_unique_abbr": "MPI-SWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Deep State Space Models for Time Series Forecasting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11747", "id": "11747", "author_site": "Syama Sundar Rangapuram, Matthias W Seeger, Jan Gasthaus, Lorenzo Stella, Bernie Wang, Tim Januschowski", "author": "Syama Sundar Rangapuram; Matthias W Seeger; Jan Gasthaus; Lorenzo Stella; Yuyang Wang; Tim Januschowski", "abstract": "We present a novel approach to probabilistic time series forecasting that combines state space models with deep learning. By parametrizing a per-time-series linear state space model with a jointly-learned recurrent neural network, our method retains desired properties of state space models such as data efficiency and interpretability, while making use of the ability to learn complex patterns from raw data offered by deep learning approaches. Our method scales gracefully from regimes where little training data is available to regimes where data from millions of time series can be leveraged to learn accurate models. We provide qualitative as well as quantitative results with the proposed method, showing that it compares favorably to the state-of-the-art.", "bibtex": "@inproceedings{NEURIPS2018_5cf68969,\n author = {Rangapuram, Syama Sundar and Seeger, Matthias W and Gasthaus, Jan and Stella, Lorenzo and Wang, Yuyang and Januschowski, Tim},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep State Space Models for Time Series Forecasting},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5cf68969fb67aa6082363a6d4e6468e2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5cf68969fb67aa6082363a6d4e6468e2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5cf68969fb67aa6082363a6d4e6468e2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5cf68969fb67aa6082363a6d4e6468e2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5cf68969fb67aa6082363a6d4e6468e2-Reviews.html", "metareview": "", "pdf_size": 335592, "gs_citation": 980, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6262538664494392230&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Amazon Research; Amazon Research; Amazon Research; Amazon Research; Amazon Research; Amazon Research", "aff_domain": "amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com", "email": "amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5cf68969fb67aa6082363a6d4e6468e2-Abstract.html", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon Research", "aff_unique_url": "https://www.amazon.science", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Deep State Space Models for Unconditional Word Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11597", "id": "11597", "author_site": "Florian Schmidt, Thomas Hofmann", "author": "Florian Schmidt; Thomas Hofmann", "abstract": "Autoregressive feedback is considered a necessity for successful unconditional text generation using stochastic sequence models. However, such feedback is known to introduce systematic biases into the training process and it obscures a principle of generation: committing to global information and forgetting local nuances. We show that a non-autoregressive deep state space model with a clear separation of global and local uncertainty can be built from only two ingredients: An independent noise source and a deterministic transition function. Recent advances on flow-based variational inference can be used to train an evidence lower-bound without resorting to annealing, auxiliary losses or similar measures. The result is a highly interpretable generative model on par with comparable auto-regressive models on the task of word generation.", "bibtex": "@inproceedings{NEURIPS2018_0cd60efb,\n author = {Schmidt, Florian and Hofmann, Thomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep State Space Models for Unconditional Word Generation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0cd60efb5578cd967c3c23894f305800-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0cd60efb5578cd967c3c23894f305800-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0cd60efb5578cd967c3c23894f305800-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0cd60efb5578cd967c3c23894f305800-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0cd60efb5578cd967c3c23894f305800-Reviews.html", "metareview": "", "pdf_size": 379417, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18192085165542561802&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science, ETH Z\u00fcrich; Department of Computer Science, ETH Z\u00fcrich", "aff_domain": "inf.ethz.ch;inf.ethz.ch", "email": "inf.ethz.ch;inf.ethz.ch", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0cd60efb5578cd967c3c23894f305800-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Deep Structured Prediction with Nonlinear Output Transformations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11612", "id": "11612", "author_site": "Colin Graber, Ofer Meshi, Alex Schwing", "author": "Colin Graber; Ofer Meshi; Alexander Schwing", "abstract": "Deep structured models are widely used for tasks like semantic segmentation, where explicit correlations between variables provide important prior information which generally helps to reduce the data needs of deep nets. However, current deep structured models are restricted by oftentimes very local neighborhood structure, which cannot be increased for computational complexity reasons, and by the fact that the output configuration, or a representation thereof, cannot be transformed further. Very recent approaches which address those issues include graphical model inference inside deep nets so as to permit subsequent non-linear output space transformations. However, optimization of those formulations is challenging and not well understood. Here, we develop a novel model which generalizes existing approaches, such as structured prediction energy networks, and discuss a formulation which maintains applicability of existing inference techniques.", "bibtex": "@inproceedings{NEURIPS2018_a2d10d35,\n author = {Graber, Colin and Meshi, Ofer and Schwing, Alexander},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Structured Prediction with Nonlinear Output Transformations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a2d10d355cdebc879e4fc6ecc6f63dd7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a2d10d355cdebc879e4fc6ecc6f63dd7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a2d10d355cdebc879e4fc6ecc6f63dd7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a2d10d355cdebc879e4fc6ecc6f63dd7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a2d10d355cdebc879e4fc6ecc6f63dd7-Reviews.html", "metareview": "", "pdf_size": 1300028, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14558697357825196777&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "University of Illinois at Urbana-Champaign; Google; University of Illinois at Urbana-Champaign", "aff_domain": "illinois.edu;google.com;illinois.edu", "email": "illinois.edu;google.com;illinois.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a2d10d355cdebc879e4fc6ecc6f63dd7-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://illinois.edu;https://www.google.com", "aff_unique_abbr": "UIUC;Google", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Urbana-Champaign;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Deep, complex, invertible networks for inversion of transmission effects in multimode optical fibres", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11331", "id": "11331", "author_site": "Ois\u00edn Moran, Piergiorgio Caramazza, Daniele Faccio, Roderick Murray-Smith", "author": "Ois\u00edn Moran; Piergiorgio Caramazza; Daniele Faccio; Roderick Murray-Smith", "abstract": "We use complex-weighted, deep networks to invert the effects of multimode optical fibre distortion of a coherent input image. We generated experimental data based on collections of optical fibre responses to greyscale input images generated with coherent light, by measuring only image amplitude (not amplitude and phase as is typical) at the output of \\SI{1}{\\metre} and \\SI{10}{\\metre} long, \\SI{105}{\\micro\\metre} diameter multimode fibre. This data is made available as the {\\it Optical fibre inverse problem} Benchmark collection. The experimental data is used to train complex-weighted models with a range of regularisation approaches. A {\\it unitary regularisation} approach for complex-weighted networks is proposed which performs well in robustly inverting the fibre transmission matrix, which fits well with the physical theory. A key benefit of the unitary constraint is that it allows us to learn a forward unitary model and analytically invert it to solve the inverse problem. We demonstrate this approach, and show how it can improve performance by incorporating knowledge of the phase shift induced by the spatial light modulator.", "bibtex": "@inproceedings{NEURIPS2018_14851003,\n author = {Moran, Ois\\'{\\i}n and Caramazza, Piergiorgio and Faccio, Daniele and Murray-Smith, Roderick},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep, complex, invertible networks for inversion of transmission effects in multimode optical fibres},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/148510031349642de5ca0c544f31b2ef-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/148510031349642de5ca0c544f31b2ef-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/148510031349642de5ca0c544f31b2ef-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/148510031349642de5ca0c544f31b2ef-Reviews.html", "metareview": "", "pdf_size": 1226751, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6570512017948887315&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "School of Computing Science, University of Glasgow, Scotland; School of Physics and Astronomy, University of Glasgow, Scotland; School of Physics and Astronomy, University of Glasgow, Scotland; School of Computing Science, University of Glasgow, Scotland", "aff_domain": "inscribe.ai;gmail.com;glasgow.ac.uk;glasgow.ac.uk", "email": "inscribe.ai;gmail.com;glasgow.ac.uk;glasgow.ac.uk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/148510031349642de5ca0c544f31b2ef-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Glasgow", "aff_unique_dep": "School of Computing Science", "aff_unique_url": "https://www.gla.ac.uk", "aff_unique_abbr": "UofG", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Glasgow", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "DeepExposure: Learning to Expose Photos with Asynchronously Reinforced Adversarial Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11226", "id": "11226", "author_site": "Runsheng Yu, Wenyu Liu, Yasen Zhang, Zhi Qu, Deli Zhao, Bo Zhang", "author": "Runsheng Yu; Wenyu Liu; Yasen Zhang; Zhi Qu; Deli Zhao; Bo Zhang", "abstract": "The accurate exposure is the key of capturing high-quality photos in computational photography, especially for mobile phones that are limited by sizes of camera modules. Inspired by luminosity masks usually applied by professional photographers, in this paper, we develop a novel algorithm for learning local exposures with deep reinforcement adversarial learning. To be specific, we segment an image into sub-images that can reflect variations of dynamic range exposures according to raw low-level features. Based on these sub-images, a local exposure for each sub-image is automatically learned by virtue of policy network sequentially while the reward of learning is globally designed for striking a balance of overall exposures. The aesthetic evaluation function is approximated by discriminator in generative adversarial networks. The reinforcement learning and the adversarial learning are trained collaboratively by asynchronous deterministic policy gradient and generative loss approximation. To further simply the algorithmic architecture, we also prove the feasibility of leveraging the discriminator as the value function. Further more, we employ each local exposure to retouch the raw input image respectively, thus delivering multiple retouched images under different exposures which are fused with exposure blending. The extensive experiments verify that our algorithms are superior to state-of-the-art methods in terms of quantitative accuracy and visual illustration.", "bibtex": "@inproceedings{NEURIPS2018_a5e0ff62,\n author = {Yu, Runsheng and Liu, Wenyu and Zhang, Yasen and Qu, Zhi and Zhao, Deli and Zhang, Bo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {DeepExposure: Learning to Expose Photos with Asynchronously Reinforced Adversarial Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a5e0ff62be0b08456fc7f1e88812af3d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a5e0ff62be0b08456fc7f1e88812af3d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a5e0ff62be0b08456fc7f1e88812af3d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a5e0ff62be0b08456fc7f1e88812af3d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a5e0ff62be0b08456fc7f1e88812af3d-Reviews.html", "metareview": "", "pdf_size": 1668263, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17666463009516250394&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Xiaomi AI Lab+South China Normal University; Xiaomi AI Lab+Peking University; Xiaomi AI Lab; Xiaomi AI Lab; Xiaomi AI Lab; Xiaomi AI Lab", "aff_domain": "gmail.com;pku.edu.cn;xiaomi.com;xiaomi.com;xiaomi.com;xiaomi.com", "email": "gmail.com;pku.edu.cn;xiaomi.com;xiaomi.com;xiaomi.com;xiaomi.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a5e0ff62be0b08456fc7f1e88812af3d-Abstract.html", "aff_unique_index": "0+1;0+2;0;0;0;0", "aff_unique_norm": "Xiaomi Corporation;South China Normal University;Peking University", "aff_unique_dep": "Xiaomi AI Lab;;", "aff_unique_url": "https://www.xiaomi.com;http://www.scnu.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "Xiaomi;SCNU;Peking U", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0+0;0;0;0;0", "aff_country_unique": "China" }, { "title": "DeepPINK: reproducible feature selection in deep neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11829", "id": "11829", "author_site": "Yang Lu, Yingying Fan, Jinchi Lv, William Stafford Noble", "author": "Yang Lu; Yingying Fan; Jinchi Lv; William Stafford Noble", "abstract": "Deep learning has become increasingly popular in both supervised and unsupervised machine learning thanks to its outstanding empirical performance. However, because of their intrinsic complexity, most deep learning methods are largely treated as black box tools with little interpretability. Even though recent attempts have been made to facilitate the interpretability of deep neural networks (DNNs), existing methods are susceptible to noise and lack of robustness. Therefore, scientists are justifiably cautious about the reproducibility of the discoveries, which is often related to the interpretability of the underlying statistical models. In this paper, we describe a method to increase the interpretability and reproducibility of DNNs by incorporating the idea of feature selection with controlled error rate. By designing a new DNN architecture and integrating it with the recently proposed knockoffs framework, we perform feature selection with a controlled error rate, while maintaining high power. This new method, DeepPINK (Deep feature selection using Paired-Input Nonlinear Knockoffs), is applied to both simulated and real data sets to demonstrate its empirical utility.", "bibtex": "@inproceedings{NEURIPS2018_29daf944,\n author = {Lu, Yang and Fan, Yingying and Lv, Jinchi and Stafford Noble, William},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {DeepPINK: reproducible feature selection in deep neural networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/29daf9442f3c0b60642b14c081b4a556-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/29daf9442f3c0b60642b14c081b4a556-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/29daf9442f3c0b60642b14c081b4a556-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/29daf9442f3c0b60642b14c081b4a556-Reviews.html", "metareview": "", "pdf_size": 1177441, "gs_citation": 183, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13503321845890894827&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Genome Sciences, University of Washington; Data Sciences and Operations Department, Marshall School of Business, University of Southern California; Data Sciences and Operations Department, Marshall School of Business, University of Southern California; Department of Genome Sciences and Department of Computer Science and Engineering, University of Washington", "aff_domain": "uw.edu;marshall.usc.edu;marshall.usc.edu;uw.edu", "email": "uw.edu;marshall.usc.edu;marshall.usc.edu;uw.edu", "github": "github.com/younglululu/DeepPINK", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/29daf9442f3c0b60642b14c081b4a556-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Washington;University of Southern California", "aff_unique_dep": "Department of Genome Sciences;Data Sciences and Operations Department", "aff_unique_url": "https://www.washington.edu;https://www.usc.edu", "aff_unique_abbr": "UW;USC", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Seattle;Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "DeepProbLog: Neural Probabilistic Logic Programming", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11374", "id": "11374", "author_site": "Robin Manhaeve, Sebastijan Dumancic, Angelika Kimmig, Thomas Demeester, Luc De Raedt", "author": "Robin Manhaeve; Sebastijan Dumancic; Angelika Kimmig; Thomas Demeester; Luc De Raedt", "abstract": "We introduce DeepProbLog, a probabilistic logic programming language that incorporates deep learning by means of neural predicates. We show how existing inference and learning techniques can be adapted for the new language. Our experiments demonstrate that DeepProbLog supports (i) both symbolic and subsymbolic representations and inference, (ii) program induction, (iii) probabilistic (logic) programming, and (iv) (deep) learning from examples. To the best of our knowledge, this work is the first to propose a framework where general-purpose neural networks and expressive probabilistic-logical modeling and reasoning are integrated in a way that exploits the full expressiveness and strengths of both worlds and can be trained end-to-end based on examples.", "bibtex": "@inproceedings{NEURIPS2018_dc5d637e,\n author = {Manhaeve, Robin and Dumancic, Sebastijan and Kimmig, Angelika and Demeester, Thomas and De Raedt, Luc},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {DeepProbLog: Neural Probabilistic Logic Programming},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dc5d637ed5e62c36ecb73b654b05ba2a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dc5d637ed5e62c36ecb73b654b05ba2a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dc5d637ed5e62c36ecb73b654b05ba2a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dc5d637ed5e62c36ecb73b654b05ba2a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dc5d637ed5e62c36ecb73b654b05ba2a-Reviews.html", "metareview": "", "pdf_size": 511515, "gs_citation": 747, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6079567413300944995&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 17, "aff": "KU Leuven; KU Leuven; Cardiff University; Ghent University - imec; KU Leuven", "aff_domain": "cs.kuleuven.be;cs.kuleuven.be;cardiff.ac.uk;ugent.be;cs.kuleuven.be", "email": "cs.kuleuven.be;cs.kuleuven.be;cardiff.ac.uk;ugent.be;cs.kuleuven.be", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dc5d637ed5e62c36ecb73b654b05ba2a-Abstract.html", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Katholieke Universiteit Leuven;Cardiff University;Ghent University", "aff_unique_dep": ";;imec", "aff_unique_url": "https://www.kuleuven.be;https://www.cardiff.ac.uk;https://www.ugent.be/en", "aff_unique_abbr": "KU Leuven;Cardiff;UGent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Belgium;United Kingdom" }, { "title": "Deepcode: Feedback Codes via Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11898", "id": "11898", "author_site": "Hyeji Kim, Yihan Jiang, Sreeram Kannan, Sewoong Oh, Pramod Viswanath", "author": "Hyeji Kim; Yihan Jiang; Sreeram Kannan; Sewoong Oh; Pramod Viswanath", "abstract": "The design of codes for communicating reliably over a statistically well defined channel is an important endeavor involving deep mathematical research and wide- ranging practical applications. In this work, we present the first family of codes obtained via deep learning, which significantly beats state-of-the-art codes designed over several decades of research. The communication channel under consideration is the Gaussian noise channel with feedback, whose study was initiated by Shannon; feedback is known theoretically to improve reliability of communication, but no practical codes that do so have ever been successfully constructed.", "bibtex": "@inproceedings{NEURIPS2018_31f81674,\n author = {Kim, Hyeji and Jiang, Yihan and Kannan, Sreeram and Oh, Sewoong and Viswanath, Pramod},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deepcode: Feedback Codes via Deep Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/31f81674a348511b990af268ca3a8391-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/31f81674a348511b990af268ca3a8391-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/31f81674a348511b990af268ca3a8391-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/31f81674a348511b990af268ca3a8391-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/31f81674a348511b990af268ca3a8391-Reviews.html", "metareview": "", "pdf_size": 940400, "gs_citation": 132, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17328761776643473390&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Samsung AI Centre Cambridge*; University of Washington\u2020; University of Washington\u2020; University of Illinois at Urbana Champaign\u2021; University of Illinois at Urbana Champaign\u2021", "aff_domain": "gmail.com;gmail.com;uw.edu;illinois.edu;illinois.edu", "email": "gmail.com;gmail.com;uw.edu;illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/31f81674a348511b990af268ca3a8391-Abstract.html", "aff_unique_index": "0;1;1;2;2", "aff_unique_norm": "Samsung;University of Washington;University of Illinois Urbana-Champaign", "aff_unique_dep": "AI Centre;;", "aff_unique_url": "https://www.samsung.com;https://www.washington.edu;https://illinois.edu", "aff_unique_abbr": "Samsung AI;UW;UIUC", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Cambridge;;Urbana-Champaign", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Delta-encoder: an effective sample synthesis method for few-shot object recognition", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11291", "id": "11291", "author_site": "Eli Schwartz, Leonid Karlinsky, Joseph Shtok, Sivan Harary, Mattias Marder, Abhishek Kumar, Rogerio Feris, Raja Giryes, Alex Bronstein", "author": "Eli Schwartz; Leonid Karlinsky; Joseph Shtok; Sivan Harary; Mattias Marder; Abhishek Kumar; Rogerio Feris; Raja Giryes; Alex Bronstein", "abstract": "Learning to classify new categories based on just one or a few examples is a long-standing challenge in modern computer vision. In this work, we propose a simple yet effective method for few-shot (and one-shot) object recognition. Our approach is based on a modified auto-encoder, denoted delta-encoder, that learns to synthesize new samples for an unseen category just by seeing few examples from it. The synthesized samples are then used to train a classifier. The proposed approach learns to both extract transferable intra-class deformations, or \"deltas\", between same-class pairs of training examples, and to apply those deltas to the few provided examples of a novel class (unseen during training) in order to efficiently synthesize samples from that new class. The proposed method improves the state-of-the-art of one-shot object-recognition and performs comparably in the few-shot case.", "bibtex": "@inproceedings{NEURIPS2018_1714726c,\n author = {Schwartz, Eli and Karlinsky, Leonid and Shtok, Joseph and Harary, Sivan and Marder, Mattias and Kumar, Abhishek and Feris, Rogerio and Giryes, Raja and Bronstein, Alex},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Delta-encoder: an effective sample synthesis method for few-shot object recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1714726c817af50457d810aae9d27a2e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1714726c817af50457d810aae9d27a2e-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1714726c817af50457d810aae9d27a2e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1714726c817af50457d810aae9d27a2e-Reviews.html", "metareview": "", "pdf_size": 2786064, "gs_citation": 482, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13986746272492724236&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "IBM Research AI+School of Electrical Engineering, Tel-Aviv University, Tel-Aviv, Israel; IBM Research AI+School of Electrical Engineering, Tel-Aviv University, Tel-Aviv, Israel; IBM Research AI; IBM Research AI; IBM Research AI; IBM Research AI; IBM Research AI; School of Electrical Engineering, Tel-Aviv University, Tel-Aviv, Israel; Department of Computer Science, Technion, Haifa, Israel", "aff_domain": "il.ibm.com;il.ibm.com; ; ; ; ; ; ; ", "email": "il.ibm.com;il.ibm.com; ; ; ; ; ; ; ", "github": "", "project": "", "author_num": 9, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1714726c817af50457d810aae9d27a2e-Abstract.html", "aff_unique_index": "0+1;0+1;0;0;0;0;0;1;2", "aff_unique_norm": "IBM;Tel-Aviv University;Technion", "aff_unique_dep": "AI;School of Electrical Engineering;Department of Computer Science", "aff_unique_url": "https://www.ibm.com/research;https://www.tau.ac.il;https://www.technion.ac.il", "aff_unique_abbr": "IBM;TAU;Technion", "aff_campus_unique_index": "1;1;1;2", "aff_campus_unique": ";Tel-Aviv;Haifa", "aff_country_unique_index": "0+1;0+1;0;0;0;0;0;1;1", "aff_country_unique": "United States;Israel" }, { "title": "Demystifying excessively volatile human learning: A Bayesian persistent prior and a neural approximation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11285", "id": "11285", "author_site": "Chaitanya Ryali, Gautam Reddy, Angela Yu", "author": "Chaitanya Ryali; Gautam Reddy; Angela J. Yu", "abstract": "Understanding how humans and animals learn about statistical regularities in stable and volatile environments, and utilize these regularities to make predictions and decisions, is an important problem in neuroscience and psychology. Using a Bayesian modeling framework, specifically the Dynamic Belief Model (DBM), it has previously been shown that humans tend to make the {\\it default} assumption that environmental statistics undergo abrupt, unsignaled changes, even when environmental statistics are actually stable. Because exact Bayesian inference in this setting, an example of switching state space models, is computationally intense, a number of approximately Bayesian and heuristic algorithms have been proposed to account for learning/prediction in the brain. Here, we examine a neurally plausible algorithm, a special case of leaky integration dynamics we denote as EXP (for exponential filtering), that is significantly simpler than all previously suggested algorithms except for the delta-learning rule, and which far outperforms the delta rule in approximating Bayesian prediction performance. We derive the theoretical relationship between DBM and EXP, and show that EXP gains computational efficiency by foregoing the representation of inferential uncertainty (as does the delta rule), but that it nevertheless achieves near-Bayesian performance due to its ability to incorporate a \"persistent prior\" influence unique to DBM and absent from the other algorithms. Furthermore, we show that EXP is comparable to DBM but better than all other models in reproducing human behavior in a visual search task, suggesting that human learning and prediction also incorporates an element of persistent prior. More broadly, our work demonstrates that when observations are information-poor, detecting changes or modulating the learning rate is both {\\it difficult} and (thus) {\\it unnecessary} for making Bayes-optimal predictions.", "bibtex": "@inproceedings{NEURIPS2018_78b9cab1,\n author = {Ryali, Chaitanya and Reddy, Gautam and Yu, Angela J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Demystifying excessively volatile human learning: A Bayesian persistent prior and a neural approximation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/78b9cab19959e4af8ff46156ee460c74-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/78b9cab19959e4af8ff46156ee460c74-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/78b9cab19959e4af8ff46156ee460c74-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/78b9cab19959e4af8ff46156ee460c74-Reviews.html", "metareview": "", "pdf_size": 522882, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9126944556603369442&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science and Engineering, University of California San Diego; Department of Physics, University of California San Diego; Department of Cognitive Science, University of California San Diego", "aff_domain": "eng.ucsd.edu;physics.ucsd.edu;ucsd.edu", "email": "eng.ucsd.edu;physics.ucsd.edu;ucsd.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/78b9cab19959e4af8ff46156ee460c74-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Dendritic cortical microcircuits approximate the backpropagation algorithm", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11833", "id": "11833", "author_site": "Jo\u00e3o Sacramento, Rui Ponte Costa, Yoshua Bengio, Walter Senn", "author": "Jo\u00e3o Sacramento; Rui Ponte Costa; Yoshua Bengio; Walter Senn", "abstract": "Deep learning has seen remarkable developments over the last years, many of them inspired by neuroscience. However, the main learning mechanism behind these advances \u2013 error backpropagation \u2013 appears to be at odds with neurobiology. Here, we introduce a multilayer neuronal network model with simplified dendritic compartments in which error-driven synaptic plasticity adapts the network towards a global desired output. In contrast to previous work our model does not require separate phases and synaptic learning is driven by local dendritic prediction errors continuously in time. Such errors originate at apical dendrites and occur due to a mismatch between predictive input from lateral interneurons and activity from actual top-down feedback. Through the use of simple dendritic compartments and different cell-types our model can represent both error and normal activity within a pyramidal neuron. We demonstrate the learning capabilities of the model in regression and classification tasks, and show analytically that it approximates the error backpropagation algorithm. Moreover, our framework is consistent with recent observations of learning between brain areas and the architecture of cortical microcircuits. Overall, we introduce a novel view of learning on dendritic cortical circuits and on how the brain may solve the long-standing synaptic credit assignment problem.", "bibtex": "@inproceedings{NEURIPS2018_1dc3a89d,\n author = {Sacramento, Jo\\~{a}o and Ponte Costa, Rui and Bengio, Yoshua and Senn, Walter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dendritic cortical microcircuits approximate the backpropagation algorithm},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1dc3a89d0d440ba31729b0ba74b93a33-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1dc3a89d0d440ba31729b0ba74b93a33-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1dc3a89d0d440ba31729b0ba74b93a33-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1dc3a89d0d440ba31729b0ba74b93a33-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1dc3a89d0d440ba31729b0ba74b93a33-Reviews.html", "metareview": "", "pdf_size": 807955, "gs_citation": 395, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1228666072575338509&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Department of Physiology, University of Bern, Switzerland + Institute of Neuroinformatics, University of Z\u00fcrich and ETH Z\u00fcrich, Z\u00fcrich, Switzerland; Department of Physiology, University of Bern, Switzerland + Computational Neuroscience Unit, Department of Computer Science, SCEEM, Faculty of Engineering, University of Bristol, United Kingdom; Mila and Universit\u00e9 de Montr\u00e9al, Canada; Department of Physiology, University of Bern, Switzerland", "aff_domain": "pyl.unibe.ch;pyl.unibe.ch;mila.quebec;pyl.unibe.ch", "email": "pyl.unibe.ch;pyl.unibe.ch;mila.quebec;pyl.unibe.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1dc3a89d0d440ba31729b0ba74b93a33-Abstract.html", "aff_unique_index": "0+1;0+2;3;0", "aff_unique_norm": "University of Bern;University of Z\u00fcrich;University of Bristol;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": "Department of Physiology;Institute of Neuroinformatics;Department of Computer Science;Mila", "aff_unique_url": "https://www.unibe.ch;https://www.neuro.ethz.ch/;https://www.bristol.ac.uk;https://www.umontreal.ca", "aff_unique_abbr": "UniBE;UZH;Bristol;UdeM", "aff_campus_unique_index": "1;;2", "aff_campus_unique": ";Z\u00fcrich;Montr\u00e9al", "aff_country_unique_index": "0+0;0+1;2;0", "aff_country_unique": "Switzerland;United Kingdom;Canada" }, { "title": "Densely Connected Attention Propagation for Reading Comprehension", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11481", "id": "11481", "author_site": "Yi Tay, Anh Tuan Luu, Siu Cheung Hui, Jian Su", "author": "Yi Tay; Anh Tuan Luu; Siu Cheung Hui; Jian Su", "abstract": "We propose DecaProp (Densely Connected Attention Propagation), a new densely connected neural architecture for reading comprehension (RC). There are two distinct characteristics of our model. Firstly, our model densely connects all pairwise layers of the network, modeling relationships between passage and query across all hierarchical levels. Secondly, the dense connectors in our network are learned via attention instead of standard residual skip-connectors. To this end, we propose novel Bidirectional Attention Connectors (BAC) for efficiently forging connections throughout the network. We conduct extensive experiments on four challenging RC benchmarks. Our proposed approach achieves state-of-the-art results on all four, outperforming existing baselines by up to 2.6% to 14.2% in absolute F1 score.", "bibtex": "@inproceedings{NEURIPS2018_7b66b4fd,\n author = {Tay, Yi and Luu, Anh Tuan and Hui, Siu Cheung and Su, Jian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Densely Connected Attention Propagation for Reading Comprehension},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7b66b4fd401a271a1c7224027ce111bc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7b66b4fd401a271a1c7224027ce111bc-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7b66b4fd401a271a1c7224027ce111bc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7b66b4fd401a271a1c7224027ce111bc-Reviews.html", "metareview": "", "pdf_size": 832203, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2530758848845000975&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Nanyang Technological University, Singapore; Institute for Infocomm Research, Singapore; Nanyang Technological University, Singapore; Institute for Infocomm Research, Singapore", "aff_domain": "e.ntu.edu.sg;i2r.a-star.edu.sg;ntu.edu.sg;i2r.a-star.edu.sg", "email": "e.ntu.edu.sg;i2r.a-star.edu.sg;ntu.edu.sg;i2r.a-star.edu.sg", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7b66b4fd401a271a1c7224027ce111bc-Abstract.html", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Nanyang Technological University;Institute for Infocomm Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "NTU;I2R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "title": "Depth-Limited Solving for Imperfect-Information Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11736", "id": "11736", "author_site": "Noam Brown, Tuomas Sandholm, Brandon Amos", "author": "Noam Brown; Tuomas Sandholm; Brandon Amos", "abstract": "A fundamental challenge in imperfect-information games is that states do not have well-defined values. As a result, depth-limited search algorithms used in single-agent settings and perfect-information games do not apply. This paper introduces a principled way to conduct depth-limited solving in imperfect-information games by allowing the opponent to choose among a number of strategies for the remainder of the game at the depth limit. Each one of these strategies results in a different set of values for leaf nodes. This forces an agent to be robust to the different strategies an opponent may employ. We demonstrate the effectiveness of this approach by building a master-level heads-up no-limit Texas hold'em poker AI that defeats two prior top agents using only a 4-core CPU and 16 GB of memory. Developing such a powerful agent would have previously required a supercomputer.", "bibtex": "@inproceedings{NEURIPS2018_34306d99,\n author = {Brown, Noam and Sandholm, Tuomas and Amos, Brandon},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Depth-Limited Solving for Imperfect-Information Games},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/34306d99c63613fad5b2a140398c0420-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/34306d99c63613fad5b2a140398c0420-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/34306d99c63613fad5b2a140398c0420-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/34306d99c63613fad5b2a140398c0420-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/34306d99c63613fad5b2a140398c0420-Reviews.html", "metareview": "", "pdf_size": 1126186, "gs_citation": 108, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11136137899306715730&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Computer Science Department, Carnegie Mellon University; Computer Science Department, Carnegie Mellon University; Computer Science Department, Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/34306d99c63613fad5b2a140398c0420-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Derivative Estimation in Random Design", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11346", "id": "11346", "author_site": "Yu Liu, Kris De Brabanter", "author": "Yu Liu; Kris De Brabanter", "abstract": "We propose a nonparametric derivative estimation method for random design without\nhaving to estimate the regression function. The method is based on a variance-reducing linear combination of symmetric difference quotients. First, we discuss\nthe special case of uniform random design and establish the estimator\u2019s asymptotic\nproperties. Secondly, we generalize these results for any distribution of the dependent variable and compare the proposed estimator with popular estimators for\nderivative estimation such as local polynomial regression and smoothing splines.", "bibtex": "@inproceedings{NEURIPS2018_db29450c,\n author = {Liu, Yu and De Brabanter, Kris},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Derivative Estimation in Random Design},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/db29450c3f5e97f97846693611f98c15-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/db29450c3f5e97f97846693611f98c15-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/db29450c3f5e97f97846693611f98c15-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/db29450c3f5e97f97846693611f98c15-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/db29450c3f5e97f97846693611f98c15-Reviews.html", "metareview": "", "pdf_size": 328001, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18181681217452627761&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Department of Computer Science; Department of Computer Science + Department of Statistics", "aff_domain": "iastate.edu;iastate.edu", "email": "iastate.edu;iastate.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/db29450c3f5e97f97846693611f98c15-Abstract.html", "aff_unique_index": "0;0+1", "aff_unique_norm": "Unknown Institution;University Affiliation Not Specified", "aff_unique_dep": "Department of Computer Science;Department of Statistics", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "Designing by Training: Acceleration Neural Network for Fast High-Dimensional Convolution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11162", "id": "11162", "author_site": "Longquan Dai, Liang Tang, Yuan Xie, Jinhui Tang", "author": "Longquan Dai; Liang Tang; Yuan Xie; Jinhui Tang", "abstract": "The high-dimensional convolution is widely used in various disciplines but has a serious performance problem due to its high computational complexity. Over the decades, people took a handmade approach to design fast algorithms for the Gaussian convolution. Recently, requirements for various non-Gaussian convolutions have emerged and are continuously getting higher. However, the handmade acceleration approach is no longer feasible for so many different convolutions since it is a time-consuming and painstaking job. Instead, we propose an Acceleration Network (AccNet) which turns the work of designing new fast algorithms to training the AccNet. This is done by: 1, interpreting splatting, blurring, slicing operations as convolutions; 2, turning these convolutions to $g$CP layers to build AccNet. After training, the activation function $g$ together with AccNet weights automatically define the new splatting, blurring and slicing operations. Experiments demonstrate AccNet is able to design acceleration algorithms for a ton of convolutions including Gaussian/non-Gaussian convolutions and produce state-of-the-art results.", "bibtex": "@inproceedings{NEURIPS2018_470e7a4f,\n author = {Dai, Longquan and Tang, Liang and Xie, Yuan and Tang, Jinhui},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Designing by Training: Acceleration Neural Network for Fast High-Dimensional Convolution},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/470e7a4f017a5476afb7eeb3f8b96f9b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/470e7a4f017a5476afb7eeb3f8b96f9b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/470e7a4f017a5476afb7eeb3f8b96f9b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/470e7a4f017a5476afb7eeb3f8b96f9b-Reviews.html", "metareview": "", "pdf_size": 7296370, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17201625474620884482&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff": "School of Computer Science and Engineering, Nanjing University of Science and Technology; CASA Environmental Technology Co., Ltd + CASA EM&EW IOT Research Center; Institute of Automation, Chinese Academy of Sciences; School of Computer Science and Engineering, Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;casaet.com;ia.ac.cn;njust.edu.cn", "email": "njust.edu.cn;casaet.com;ia.ac.cn;njust.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/470e7a4f017a5476afb7eeb3f8b96f9b-Abstract.html", "aff_unique_index": "0;1+2;3;0", "aff_unique_norm": "Nanjing University of Science and Technology;CASA Environmental Technology Co., Ltd;CASA EM&EW IOT Research Center;Chinese Academy of Sciences", "aff_unique_dep": "School of Computer Science and Engineering;;IOT Research Center;Institute of Automation", "aff_unique_url": "http://www.nust.edu.cn;;;http://www.ia.cas.cn", "aff_unique_abbr": "NUST;;;CAS", "aff_campus_unique_index": "0;;0", "aff_campus_unique": "Nanjing;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "Dialog-based Interactive Image Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11090", "id": "11090", "author_site": "Xiaoxiao Guo, Hui Wu, Yu Cheng, Steven Rennie, Gerald Tesauro, Rogerio Feris", "author": "Xiaoxiao Guo; Hui Wu; Yu Cheng; Steven Rennie; Gerald Tesauro; Rogerio Feris", "abstract": "Existing methods for interactive image retrieval have demonstrated the merit of integrating user feedback, improving retrieval results. However, most current systems rely on restricted forms of user feedback, such as binary relevance responses, or feedback based on a fixed set of relative attributes, which limits their impact. In this paper, we introduce a new approach to interactive image search that enables users to provide feedback via natural language, allowing for more natural and effective interaction. We formulate the task of dialog-based interactive image retrieval as a reinforcement learning problem, and reward the dialog system for improving the rank of the target image during each dialog turn. To mitigate the cumbersome and costly process of collecting human-machine conversations as the dialog system learns, we train our system with a user simulator, which is itself trained to describe the differences between target and candidate images. The efficacy of our approach is demonstrated in a footwear retrieval application. Experiments on both simulated and real-world data show that 1) our proposed learning framework achieves better accuracy than other supervised and reinforcement learning baselines and 2) user feedback based on natural language rather than pre-specified attributes leads to more effective retrieval results, and a more natural and expressive communication interface.", "bibtex": "@inproceedings{NEURIPS2018_a01a0380,\n author = {Guo, Xiaoxiao and Wu, Hui and Cheng, Yu and Rennie, Steven and Tesauro, Gerald and Feris, Rogerio},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dialog-based Interactive Image Retrieval},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a01a0380ca3c61428c26a231f0e49a09-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a01a0380ca3c61428c26a231f0e49a09-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a01a0380ca3c61428c26a231f0e49a09-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a01a0380ca3c61428c26a231f0e49a09-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a01a0380ca3c61428c26a231f0e49a09-Reviews.html", "metareview": "", "pdf_size": 2130547, "gs_citation": 230, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4258300372823907612&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "IBM Research AI; IBM Research AI; IBM Research AI; Fusemachines Inc.; IBM Research AI; IBM Research AI", "aff_domain": "ibm.com;us.ibm.com;us.ibm.com;gmail.com;us.ibm.com;us.ibm.com", "email": "ibm.com;us.ibm.com;us.ibm.com;gmail.com;us.ibm.com;us.ibm.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a01a0380ca3c61428c26a231f0e49a09-Abstract.html", "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "IBM;Fusemachines", "aff_unique_dep": "AI;", "aff_unique_url": "https://www.ibm.com/research;https://fusemachines.com", "aff_unique_abbr": "IBM;Fusemachines", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dialog-to-Action: Conversational Question Answering Over a Large-Scale Knowledge Base", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11300", "id": "11300", "author_site": "Daya Guo, Duyu Tang, Nan Duan, Ming Zhou, Jian Yin", "author": "Daya Guo; Duyu Tang; Nan Duan; Ming Zhou; Jian Yin", "abstract": "We present an approach to map utterances in conversation to logical forms, which will be executed on a large-scale knowledge base. To handle enormous ellipsis phenomena in conversation, we introduce dialog memory management to manipulate historical entities, predicates, and logical forms when inferring the logical form of current utterances. Dialog memory management is embodied in a generative model, in which a logical form is interpreted in a top-down manner following a small and flexible grammar. We learn the model from denotations without explicit annotation of logical forms, and evaluate it on a large-scale dataset consisting of 200K dialogs over 12.8M entities. Results verify the benefits of modeling dialog memory, and show that our semantic parsing-based approach outperforms a memory network based encoder-decoder model by a huge margin.", "bibtex": "@inproceedings{NEURIPS2018_d63fbf8c,\n author = {Guo, Daya and Tang, Duyu and Duan, Nan and Zhou, Ming and Yin, Jian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dialog-to-Action: Conversational Question Answering Over a Large-Scale Knowledge Base},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d63fbf8c3173730f82b150c5ef38b8ff-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d63fbf8c3173730f82b150c5ef38b8ff-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d63fbf8c3173730f82b150c5ef38b8ff-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d63fbf8c3173730f82b150c5ef38b8ff-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d63fbf8c3173730f82b150c5ef38b8ff-Reviews.html", "metareview": "", "pdf_size": 701568, "gs_citation": 152, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4050957247933261039&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "The School of Data and Computer Science, Sun Yat-sen University + Guangdong Key Laboratory of Big Data Analysis and Processing, Guangzhou, P.R.China; Microsoft Research Asia, Beijing, China; Microsoft Research Asia, Beijing, China; Microsoft Research Asia, Beijing, China; The School of Data and Computer Science, Sun Yat-sen University + Guangdong Key Laboratory of Big Data Analysis and Processing, Guangzhou, P.R.China", "aff_domain": "mail2.sysu.edu.cn;microsoft.com;microsoft.com;microsoft.com;mail.sysu.edu.cn", "email": "mail2.sysu.edu.cn;microsoft.com;microsoft.com;microsoft.com;mail.sysu.edu.cn", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d63fbf8c3173730f82b150c5ef38b8ff-Abstract.html", "aff_unique_index": "0+1;2;2;2;0+1", "aff_unique_norm": "Sun Yat-sen University;Guangdong Key Laboratory of Big Data Analysis and Processing;Microsoft", "aff_unique_dep": "School of Data and Computer Science;;Research", "aff_unique_url": "http://www.sysu.edu.cn/;;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "SYSU;;MSRA", "aff_campus_unique_index": "1;2;2;2;1", "aff_campus_unique": ";Guangzhou;Beijing", "aff_country_unique_index": "0+0;0;0;0;0+0", "aff_country_unique": "China" }, { "title": "DifNet: Semantic Segmentation by Diffusion Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11177", "id": "11177", "author_site": "Peng Jiang, Fanglin Gu, Yunhai Wang, Changhe Tu, Baoquan Chen", "author": "Peng Jiang; Fanglin Gu; Yunhai Wang; Changhe Tu; Baoquan Chen", "abstract": "Deep Neural Networks (DNNs) have recently shown state of the art performance on semantic segmentation tasks, however, they still suffer from problems of poor boundary localization and spatial fragmented predictions. The difficulties lie in the requirement of making dense predictions from a long path model all at once since details are hard to keep when data goes through deeper layers. Instead, in this work, we decompose this difficult task into two relative simple sub-tasks: seed detection which is required to predict initial predictions without the need of wholeness and preciseness, and similarity estimation which measures the possibility of any two nodes belong to the same class without the need of knowing which class they are. We use one branch network for one sub-task each, and apply a cascade of random walks base on hierarchical semantics to approximate a complex diffusion process which propagates seed information to the whole image according to the estimated similarities. \nThe proposed DifNet consistently produces improvements over the baseline models with the same depth and with the equivalent number of parameters, and also achieves promising performance on Pascal VOC and Pascal Context dataset. OurDifNet is trained end-to-end without complex loss functions.", "bibtex": "@inproceedings{NEURIPS2018_c2626d85,\n author = {Jiang, Peng and Gu, Fanglin and Wang, Yunhai and Tu, Changhe and Chen, Baoquan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {DifNet: Semantic Segmentation by Diffusion Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c2626d850c80ea07e7511bbae4c76f4b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c2626d850c80ea07e7511bbae4c76f4b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c2626d850c80ea07e7511bbae4c76f4b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c2626d850c80ea07e7511bbae4c76f4b-Reviews.html", "metareview": "", "pdf_size": 750098, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13232237255842510854&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Shandong University, China; Shandong University, China; Shandong University, China; Shandong University, China+Peking University, China; Peking University, China", "aff_domain": "gmail.com;gmail.com;gmail.com;sdu.edu.cn;gmail.com", "email": "gmail.com;gmail.com;gmail.com;sdu.edu.cn;gmail.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c2626d850c80ea07e7511bbae4c76f4b-Abstract.html", "aff_unique_index": "0;0;0;0+1;1", "aff_unique_norm": "Shandong University;Peking University", "aff_unique_dep": ";", "aff_unique_url": "http://www.sdu.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "SDU;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0+0;0", "aff_country_unique": "China" }, { "title": "Differentiable MPC for End-to-end Planning and Control", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11793", "id": "11793", "author_site": "Brandon Amos, Ivan Jimenez, Jacob I Sacks, Byron Boots, J. Zico Kolter", "author": "Brandon Amos; Ivan Jimenez; Jacob Sacks; Byron Boots; J. Zico Kolter", "abstract": "We present foundations for using Model Predictive Control (MPC) as a differentiable policy class for reinforcement learning. This provides one way of leveraging and combining the advantages of model-free and model-based approaches. Specifically, we differentiate through MPC by using the KKT conditions of the convex approximation at a fixed point of the controller. Using this strategy, we are able to learn the cost and dynamics of a controller via end-to-end learning. Our experiments focus on imitation learning in the pendulum and cartpole domains, where we learn the cost and dynamics terms of an MPC policy class. We show that our MPC policies are significantly more data-efficient than a generic neural network and that our method is superior to traditional system identification in a setting where the expert is unrealizable.", "bibtex": "@inproceedings{NEURIPS2018_ba6d843e,\n author = {Amos, Brandon and Jimenez, Ivan and Sacks, Jacob and Boots, Byron and Kolter, J. Zico},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Differentiable MPC for End-to-end Planning and Control},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ba6d843eb4251a4526ce65d1807a9309-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ba6d843eb4251a4526ce65d1807a9309-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ba6d843eb4251a4526ce65d1807a9309-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ba6d843eb4251a4526ce65d1807a9309-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ba6d843eb4251a4526ce65d1807a9309-Reviews.html", "metareview": "", "pdf_size": 448680, "gs_citation": 493, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14843462917652881335&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "aff": "Carnegie Mellon University; Georgia Tech; Georgia Tech; Georgia Tech; Carnegie Mellon University + Bosch Center for AI", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ba6d843eb4251a4526ce65d1807a9309-Abstract.html", "aff_unique_index": "0;1;1;1;0+2", "aff_unique_norm": "Carnegie Mellon University;Georgia Institute of Technology;Bosch Center for AI", "aff_unique_dep": ";;Center for AI", "aff_unique_url": "https://www.cmu.edu;https://www.gatech.edu;https://www.bosch-ai.com", "aff_unique_abbr": "CMU;Georgia Tech;BCAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0+1", "aff_country_unique": "United States;Germany" }, { "title": "Differential Privacy for Growing Databases", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11846", "id": "11846", "author_site": "Rachel Cummings, Sara Krehbiel, Kevin A Lai, Uthaipon Tantipongpipat", "author": "Rachel Cummings; Sara Krehbiel; Kevin A Lai; Uthaipon Tantipongpipat", "abstract": "The large majority of differentially private algorithms focus on the static setting, where queries are made on an unchanging database. This is unsuitable for the myriad applications involving databases that grow over time. To address this gap in the literature, we consider the dynamic setting, in which new data arrive over time. Previous results in this setting have been limited to answering a single non-adaptive query repeatedly as the database grows. In contrast, we provide tools for richer and more adaptive analysis of growing databases. Our first contribution is a novel modification of the private multiplicative weights algorithm, which provides accurate analysis of exponentially many adaptive linear queries (an expressive query class including all counting queries) for a static database. Our modification maintains the accuracy guarantee of the static setting even as the database grows without bound. Our second contribution is a set of general results which show that many other private and accurate algorithms can be immediately extended to the dynamic setting by rerunning them at appropriate points of data growth with minimal loss of accuracy, even when data growth is unbounded.", "bibtex": "@inproceedings{NEURIPS2018_ac27b772,\n author = {Cummings, Rachel and Krehbiel, Sara and Lai, Kevin A and Tantipongpipat, Uthaipon},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Differential Privacy for Growing Databases},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ac27b77292582bc293a51055bfc994ee-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ac27b77292582bc293a51055bfc994ee-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ac27b77292582bc293a51055bfc994ee-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ac27b77292582bc293a51055bfc994ee-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ac27b77292582bc293a51055bfc994ee-Reviews.html", "metareview": "", "pdf_size": 454053, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10685326186347705049&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Georgia Institute of Technology; University of Richmond; Georgia Institute of Technology; Georgia Institute of Technology", "aff_domain": "gatech.edu;richmond.edu;gatech.edu;gatech.edu", "email": "gatech.edu;richmond.edu;gatech.edu;gatech.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ac27b77292582bc293a51055bfc994ee-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Georgia Institute of Technology;University of Richmond", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.richmond.edu", "aff_unique_abbr": "Georgia Tech;UR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Differential Properties of Sinkhorn Approximation for Learning with Wasserstein Distance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11570", "id": "11570", "author_site": "Giulia Luise, Alessandro Rudi, Massimiliano Pontil, Carlo Ciliberto", "author": "Giulia Luise; Alessandro Rudi; Massimiliano Pontil; Carlo Ciliberto", "abstract": "Applications of optimal transport have recently gained remarkable attention as a result of the computational advantages of entropic regularization. However, in most situations the Sinkhorn approximation to the Wasserstein distance is replaced by a regularized version that is less accurate but easy to differentiate. In this work we characterize the differential properties of the original Sinkhorn approximation, proving that it enjoys the same smoothness as its regularized version and we explicitly provide an efficient algorithm to compute its gradient. We show that this result benefits both theory and applications: on one hand, high order smoothness confers statistical guarantees to learning with Wasserstein approximations. On the other hand, the gradient formula allows to efficiently solve learning and optimization problems in practice. Promising preliminary experiments complement our analysis.", "bibtex": "@inproceedings{NEURIPS2018_3fc2c60b,\n author = {Luise, Giulia and Rudi, Alessandro and Pontil, Massimiliano and Ciliberto, Carlo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Differential Properties of Sinkhorn Approximation for Learning with Wasserstein Distance},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3fc2c60b5782f641f76bcefc39fb2392-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3fc2c60b5782f641f76bcefc39fb2392-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3fc2c60b5782f641f76bcefc39fb2392-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3fc2c60b5782f641f76bcefc39fb2392-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3fc2c60b5782f641f76bcefc39fb2392-Reviews.html", "metareview": "", "pdf_size": 676497, "gs_citation": 173, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=436330101781594143&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 17, "aff": "Department of Computer Science, University College London, London, UK + Istituto Italiano di Tecnologia, Genova, Italy; INRIA - D\u00e9partement d\u2019informatique, \u00c9cole Normale Sup\u00e9rieure - PSL Research University, Paris, France; Department of Computer Science, University College London, London, UK + Istituto Italiano di Tecnologia, Genova, Italy; Department of Computer Science, University College London, London, UK + Department of Electrical and Electronic Engineering, Imperial College, London, UK", "aff_domain": "ucl.ac.uk;inria.fr;ucl.ac.uk;imperial.ac.uk", "email": "ucl.ac.uk;inria.fr;ucl.ac.uk;imperial.ac.uk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3fc2c60b5782f641f76bcefc39fb2392-Abstract.html", "aff_unique_index": "0+1;2;0+1;0+3", "aff_unique_norm": "University College London;Istituto Italiano di Tecnologia;INRIA;Imperial College London", "aff_unique_dep": "Department of Computer Science;;D\u00e9partement d\u2019informatique;Department of Electrical and Electronic Engineering", "aff_unique_url": "https://www.ucl.ac.uk;https://www.iit.it;https://www.inria.fr;https://www.imperial.ac.uk", "aff_unique_abbr": "UCL;IIT;INRIA;ICL", "aff_campus_unique_index": "0+1;2;0+1;0+0", "aff_campus_unique": "London;Genova;Paris", "aff_country_unique_index": "0+1;2;0+1;0+0", "aff_country_unique": "United Kingdom;Italy;France" }, { "title": "Differentially Private Bayesian Inference for Exponential Families", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11298", "id": "11298", "author_site": "Garrett Bernstein, Daniel Sheldon", "author": "Garrett Bernstein; Daniel R. Sheldon", "abstract": "The study of private inference has been sparked by growing concern regarding the analysis of data when it stems from sensitive sources. We present the first method for private Bayesian inference in exponential families that properly accounts for noise introduced by the privacy mechanism. It is efficient because it works only with sufficient statistics and not individual data. Unlike other methods, it gives properly calibrated posterior beliefs in the non-asymptotic data regime.", "bibtex": "@inproceedings{NEURIPS2018_08040837,\n author = {Bernstein, Garrett and Sheldon, Daniel R},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Differentially Private Bayesian Inference for Exponential Families},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/08040837089cdf46631a10aca5258e16-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/08040837089cdf46631a10aca5258e16-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/08040837089cdf46631a10aca5258e16-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/08040837089cdf46631a10aca5258e16-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/08040837089cdf46631a10aca5258e16-Reviews.html", "metareview": "", "pdf_size": 3349557, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15594098166220760665&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "College of Information and Computer Sciences, University of Massachusetts Amherst; College of Information and Computer Sciences, University of Massachusetts Amherst", "aff_domain": "cs.umass.edu;cs.umass.edu", "email": "cs.umass.edu;cs.umass.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/08040837089cdf46631a10aca5258e16-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Massachusetts Amherst", "aff_unique_dep": "College of Information and Computer Sciences", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass Amherst", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Differentially Private Change-Point Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12023", "id": "12023", "author_site": "Rachel Cummings, Sara Krehbiel, Yajun Mei, Rui Tuo, Wanrong Zhang", "author": "Rachel Cummings; Sara Krehbiel; Yajun Mei; Rui Tuo; Wanrong Zhang", "abstract": "The change-point detection problem seeks to identify distributional changes at an unknown change-point k* in a stream of data. This problem appears in many important practical settings involving personal data, including biosurveillance, fault detection, finance, signal detection, and security systems. The field of differential privacy offers data analysis tools that provide powerful worst-case privacy guarantees. We study the statistical problem of change-point problem through the lens of differential privacy. We give private algorithms for both online and offline change-point detection, analyze these algorithms theoretically, and then provide empirical validation of these results.", "bibtex": "@inproceedings{NEURIPS2018_f19ec2b8,\n author = {Cummings, Rachel and Krehbiel, Sara and Mei, Yajun and Tuo, Rui and Zhang, Wanrong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Differentially Private Change-Point Detection},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f19ec2b84181033bf4753a5a51d5d608-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f19ec2b84181033bf4753a5a51d5d608-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f19ec2b84181033bf4753a5a51d5d608-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f19ec2b84181033bf4753a5a51d5d608-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f19ec2b84181033bf4753a5a51d5d608-Reviews.html", "metareview": "", "pdf_size": 511424, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16657994829808010136&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Georgia Institute of Technology; University of Richmond; Georgia Institute of Technology; Texas A&M University; Georgia Institute of Technology", "aff_domain": "gatech.edu;richmond.edu;gatech.edu;tamu.edu;gatech.edu", "email": "gatech.edu;richmond.edu;gatech.edu;tamu.edu;gatech.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f19ec2b84181033bf4753a5a51d5d608-Abstract.html", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Georgia Institute of Technology;University of Richmond;Texas A&M University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.gatech.edu;https://www.richmond.edu;https://www.tamu.edu", "aff_unique_abbr": "Georgia Tech;UR;TAMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Differentially Private Contextual Linear Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11425", "id": "11425", "author_site": "Roshan Shariff, Or Sheffet", "author": "Roshan Shariff; Or Sheffet", "abstract": "We study the contextual linear bandit problem, a version of the standard stochastic multi-armed bandit (MAB) problem where a learner sequentially selects actions to maximize a reward which depends also on a user provided per-round context. Though the context is chosen arbitrarily or adversarially, the reward is assumed to be a stochastic function of a feature vector that encodes the context and selected action. Our goal is to devise private learners for the contextual linear bandit problem.", "bibtex": "@inproceedings{NEURIPS2018_a1d7311f,\n author = {Shariff, Roshan and Sheffet, Or},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Differentially Private Contextual Linear Bandits},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a1d7311f2a312426d710e1c617fcbc8c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a1d7311f2a312426d710e1c617fcbc8c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a1d7311f2a312426d710e1c617fcbc8c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a1d7311f2a312426d710e1c617fcbc8c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a1d7311f2a312426d710e1c617fcbc8c-Reviews.html", "metareview": "", "pdf_size": 320240, "gs_citation": 141, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2146888961390448405&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Computing Science, University of Alberta; Department of Computing Science, University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca", "email": "ualberta.ca;ualberta.ca", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a1d7311f2a312426d710e1c617fcbc8c-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "Department of Computing Science", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Differentially Private Robust Low-Rank Approximation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11410", "id": "11410", "author_site": "Raman Arora, Vladimir Braverman, Jalaj Upadhyay", "author": "Raman Arora; Vladimir braverman; Jalaj Upadhyay", "abstract": "In this paper, we study the following robust low-rank matrix approximation problem: given a matrix $A \\in \\R^{n \\times d}$, find a rank-$k$ matrix $B$, while satisfying differential privacy, such that \n$ \\norm{ A - B }_p \\leq \\alpha \\mathsf{OPT}_k(A) + \\tau,$ where \n$\\norm{ M }_p$ is the entry-wise $\\ell_p$-norm \nand $\\mathsf{OPT}_k(A):=\\min_{\\mathsf{rank}(X) \\leq k} \\norm{ A - X}_p$. \nIt is well known that low-rank approximation w.r.t. entrywise $\\ell_p$-norm, for $p \\in [1,2)$, yields robustness to gross outliers in the data. We propose an algorithm that guarantees $\\alpha=\\widetilde{O}(k^2), \\tau=\\widetilde{O}(k^2(n+kd)/\\varepsilon)$, runs in $\\widetilde O((n+d)\\poly~k)$ time and uses $O(k(n+d)\\log k)$ space. We study extensions to the streaming setting where entries of the matrix arrive in an arbitrary order and output is produced at the very end or continually. We also study the related problem of differentially private robust principal component analysis (PCA), wherein we return a rank-$k$ projection matrix $\\Pi$ such that $\\norm{ A - A \\Pi }_p \\leq \\alpha \\mathsf{OPT}_k(A) + \\tau.$", "bibtex": "@inproceedings{NEURIPS2018_a9813e95,\n author = {Arora, Raman and braverman, Vladimir and Upadhyay, Jalaj},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Differentially Private Robust Low-Rank Approximation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a9813e9550fee3110373c21fa012eee7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a9813e9550fee3110373c21fa012eee7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a9813e9550fee3110373c21fa012eee7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a9813e9550fee3110373c21fa012eee7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a9813e9550fee3110373c21fa012eee7-Reviews.html", "metareview": "", "pdf_size": 592589, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13331359236864766199&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Johns Hopkins University; Johns Hopkins University; Johns Hopkins University", "aff_domain": "cs.jhu.edu;cs.jhu.edu;jhu.edu", "email": "cs.jhu.edu;cs.jhu.edu;jhu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a9813e9550fee3110373c21fa012eee7-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Differentially Private Testing of Identity and Closeness of Discrete Distributions", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11663", "id": "11663", "author_site": "Jayadev Acharya, Ziteng Sun, Huanyu Zhang", "author": "Jayadev Acharya; Ziteng Sun; Huanyu Zhang", "abstract": "We study the fundamental problems of identity testing (goodness of fit), and closeness testing (two sample test) of distributions over $k$ elements, under differential privacy. While the problems have a long history in statistics, finite sample bounds for these problems have only been established recently. \n\nIn this work, we derive upper and lower bounds on the sample complexity of both the problems under $(\\varepsilon, \\delta)$-differential privacy. We provide optimal sample complexity algorithms for identity testing problem for all parameter ranges, and the first results for closeness testing. Our closeness testing bounds are optimal in the sparse regime where the number of samples is at most $k$. \n\nOur upper bounds are obtained by privatizing non-private estimators for these problems. The non-private estimators are chosen to have small sensitivity. We propose a general framework to establish lower bounds on the sample complexity of statistical tasks under differential privacy. We show a bound on differentially private algorithms in terms of a coupling between the two hypothesis classes we aim to test. By constructing carefully chosen priors over the hypothesis classes, and using Le Cam's two point theorem we provide a general mechanism for proving lower bounds. We believe that the framework can be used to obtain strong lower bounds for other statistical tasks under privacy.", "bibtex": "@inproceedings{NEURIPS2018_7de32147,\n author = {Acharya, Jayadev and Sun, Ziteng and Zhang, Huanyu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Differentially Private Testing of Identity and Closeness of Discrete Distributions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7de32147a4f1055bed9e4faf3485a84d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7de32147a4f1055bed9e4faf3485a84d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7de32147a4f1055bed9e4faf3485a84d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7de32147a4f1055bed9e4faf3485a84d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7de32147a4f1055bed9e4faf3485a84d-Reviews.html", "metareview": "", "pdf_size": 868200, "gs_citation": 103, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17699457646903280756&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Cornell University; Cornell University; Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu", "email": "cornell.edu;cornell.edu;cornell.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7de32147a4f1055bed9e4faf3485a84d-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Differentially Private Uniformly Most Powerful Tests for Binomial Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11417", "id": "11417", "author_site": "Jordan Awan, Aleksandra Slavkovi\u0107", "author": "Jordan Awan; Aleksandra Slavkovi\u0107", "abstract": "We derive uniformly most powerful (UMP) tests for simple and one-sided hypotheses for a population proportion within the framework of Differential Privacy (DP), optimizing finite sample performance. We show that in general, DP hypothesis tests can be written in terms of linear constraints, and for exchangeable data can always be expressed as a function of the empirical distribution. Using this structure, we prove a \u2018Neyman-Pearson lemma\u2019 for binomial data under DP, where the DP-UMP only depends on the sample sum. Our tests can also be stated as a post-processing of a random variable, whose distribution we coin \u201cTruncated-Uniform-Laplace\u201d (Tulap), a generalization of the Staircase and discrete Laplace distributions. Furthermore, we obtain exact p-values, which are easily computed in terms of the Tulap random variable. We show that our results also apply to distribution-free hypothesis tests for continuous data. Our simulation results demonstrate that our tests have exact type I error, and are more powerful than current techniques.", "bibtex": "@inproceedings{NEURIPS2018_296472c9,\n author = {Awan, Jordan and Slavkovi\\'{c}, Aleksandra},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Differentially Private Uniformly Most Powerful Tests for Binomial Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/296472c9542ad4d4788d543508116cbc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/296472c9542ad4d4788d543508116cbc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/296472c9542ad4d4788d543508116cbc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/296472c9542ad4d4788d543508116cbc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/296472c9542ad4d4788d543508116cbc-Reviews.html", "metareview": "", "pdf_size": 337747, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=852811292962275136&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Statistics, Penn State University; Department of Statistics, Penn State University", "aff_domain": "psu.edu;psu.edu", "email": "psu.edu;psu.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/296472c9542ad4d4788d543508116cbc-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Penn State University", "aff_unique_dep": "Department of Statistics", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Differentially Private k-Means with Constant Multiplicative Error", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11530", "id": "11530", "author_site": "Uri Stemmer, Haim Kaplan", "author": "Uri Stemmer; Haim Kaplan", "abstract": "We design new differentially private algorithms for the Euclidean k-means problem, both in the centralized model and in the local model of differential privacy. In both models, our algorithms achieve significantly improved error guarantees than the previous state-of-the-art. In addition, in the local model, our algorithm significantly reduces the number of interaction rounds.", "bibtex": "@inproceedings{NEURIPS2018_32b991e5,\n author = {Stemmer, Uri and Kaplan, Haim},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Differentially Private k-Means with Constant Multiplicative Error},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/32b991e5d77ad140559ffb95522992d0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/32b991e5d77ad140559ffb95522992d0-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/32b991e5d77ad140559ffb95522992d0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/32b991e5d77ad140559ffb95522992d0-Reviews.html", "metareview": "", "pdf_size": 409569, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3085515682700771054&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff": "Tel Aviv University and Google; Ben-Gurion University + Weizmann Institute of Science", "aff_domain": "post.tau.ac.il;uri.co.il", "email": "post.tau.ac.il;uri.co.il", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/32b991e5d77ad140559ffb95522992d0-Abstract.html", "aff_unique_index": "0;1+2", "aff_unique_norm": "Tel Aviv University;Ben-Gurion University of the Negev;Weizmann Institute of Science", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tau.ac.il;https://www.bgu.ac.il;https://www.weizmann.org.il", "aff_unique_abbr": "TAU;BGU;Weizmann", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0", "aff_country_unique": "Israel" }, { "title": "Diffusion Maps for Textual Network Embedding", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11729", "id": "11729", "author_site": "Xinyuan Zhang, Yitong Li, Dinghan Shen, Lawrence Carin", "author": "Xinyuan Zhang; Yitong Li; Dinghan Shen; Lawrence Carin", "abstract": "Textual network embedding leverages rich text information associated with the network to learn low-dimensional vectorial representations of vertices.\nRather than using typical natural language processing (NLP) approaches, recent research exploits the relationship of texts on the same edge to graphically embed text. However, these models neglect to measure the complete level of connectivity between any two texts in the graph. We present diffusion maps for textual network embedding (DMTE), integrating global structural information of the graph to capture the semantic relatedness between texts, with a diffusion-convolution operation applied on the text inputs. In addition, a new objective function is designed to efficiently preserve the high-order proximity using the graph diffusion. Experimental results show that the proposed approach outperforms state-of-the-art methods on the vertex-classification and link-prediction tasks.", "bibtex": "@inproceedings{NEURIPS2018_211a7a84,\n author = {Zhang, Xinyuan and Li, Yitong and Shen, Dinghan and Carin, Lawrence},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Diffusion Maps for Textual Network Embedding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/211a7a84d3d5ce4d80347da11e0c85ed-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/211a7a84d3d5ce4d80347da11e0c85ed-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/211a7a84d3d5ce4d80347da11e0c85ed-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/211a7a84d3d5ce4d80347da11e0c85ed-Reviews.html", "metareview": "", "pdf_size": 556903, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13851030503517452495&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "Department of Electrical and Computer Engineering, Duke University; Department of Electrical and Computer Engineering, Duke University; Department of Electrical and Computer Engineering, Duke University; Department of Electrical and Computer Engineering, Duke University", "aff_domain": "duke.edu;duke.edu;duke.edu;duke.edu", "email": "duke.edu;duke.edu;duke.edu;duke.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/211a7a84d3d5ce4d80347da11e0c85ed-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dimensionality Reduction for Stationary Time Series via Stochastic Nonconvex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11351", "id": "11351", "author_site": "Minshuo Chen, Lin Yang, Mengdi Wang, Tuo Zhao", "author": "Minshuo Chen; Lin Yang; Mengdi Wang; Tuo Zhao", "abstract": "Stochastic optimization naturally arises in machine learning. Efficient algorithms with provable guarantees, however, are still largely missing, when the objective function is nonconvex and the data points are dependent. This paper studies this fundamental challenge through a streaming PCA problem for stationary time series data. Specifically, our goal is to estimate the principle component of time series data with respect to the covariance matrix of the stationary distribution. Computationally, we propose a variant of Oja's algorithm combined with downsampling to control the bias of the stochastic gradient caused by the data dependency. Theoretically, we quantify the uncertainty of our proposed stochastic algorithm based on diffusion approximations. This allows us to prove the asymptotic rate of convergence and further implies near optimal asymptotic sample complexity. Numerical experiments are provided to support our analysis.", "bibtex": "@inproceedings{NEURIPS2018_d2541440,\n author = {Chen, Minshuo and Yang, Lin and Wang, Mengdi and Zhao, Tuo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dimensionality Reduction for Stationary Time Series via Stochastic Nonconvex Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d25414405eb37dae1c14b18d6a2cac34-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d25414405eb37dae1c14b18d6a2cac34-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d25414405eb37dae1c14b18d6a2cac34-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d25414405eb37dae1c14b18d6a2cac34-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d25414405eb37dae1c14b18d6a2cac34-Reviews.html", "metareview": "", "pdf_size": 1021110, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9708026179247253025&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Georgia Institute of Technology; Princeton University; Princeton University; Georgia Institute of Technology", "aff_domain": "gatech.edu;princeton.edu;princeton.edu;gatech.edu", "email": "gatech.edu;princeton.edu;princeton.edu;gatech.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d25414405eb37dae1c14b18d6a2cac34-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Georgia Institute of Technology;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.princeton.edu", "aff_unique_abbr": "Georgia Tech;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dimensionality Reduction has Quantifiable Imperfections: Two Geometric Bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11808", "id": "11808", "author_site": "Yik Chau (Kry) Lui, Gavin Weiguang Ding, Ruitong Huang, Robert McCann", "author": "Kry Lui; Gavin Weiguang Ding; Ruitong Huang; Robert McCann", "abstract": "In this paper, we investigate Dimensionality reduction (DR) maps in an information retrieval setting from a quantitative topology point of view. In particular, we show that no DR maps can achieve perfect precision and perfect recall simultaneously. Thus a continuous DR map must have imperfect precision. We further prove an upper bound on the precision of Lipschitz continuous DR maps. While precision is a natural measure in an information retrieval setting, it does not measure `how' wrong the retrieved data is. We therefore propose a new measure based on Wasserstein distance that comes with similar theoretical guarantee. A key technical step in our proofs is a particular optimization problem of the $L_2$-Wasserstein distance over a constrained set of distributions. We provide a complete solution to this optimization problem, which can be of independent interest on the technical side.", "bibtex": "@inproceedings{NEURIPS2018_037a595e,\n author = {Lui, Kry and Ding, Gavin Weiguang and Huang, Ruitong and McCann, Robert},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dimensionality Reduction has Quantifiable Imperfections: Two Geometric Bounds},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/037a595e6f4f0576a9efe43154d71c18-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/037a595e6f4f0576a9efe43154d71c18-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/037a595e6f4f0576a9efe43154d71c18-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/037a595e6f4f0576a9efe43154d71c18-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/037a595e6f4f0576a9efe43154d71c18-Reviews.html", "metareview": "", "pdf_size": 602857, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12061323098286695931&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Borealis AI; Borealis AI; Borealis AI; Department of Mathematics+University of Toronto", "aff_domain": "borealisai.com;borealisai.com;borealisai.com;math.toronto.edu", "email": "borealisai.com;borealisai.com;borealisai.com;math.toronto.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/037a595e6f4f0576a9efe43154d71c18-Abstract.html", "aff_unique_index": "0;0;0;1+2", "aff_unique_norm": "Borealis AI;Mathematics Department;University of Toronto", "aff_unique_dep": ";Department of Mathematics;", "aff_unique_url": "https://www.borealisai.com;;https://www.utoronto.ca", "aff_unique_abbr": "Borealis AI;;U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada;" }, { "title": "Dimensionally Tight Bounds for Second-Order Hamiltonian Monte Carlo", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11585", "id": "11585", "author_site": "Oren Mangoubi, Nisheeth Vishnoi", "author": "Oren Mangoubi; Nisheeth Vishnoi", "abstract": "Hamiltonian Monte Carlo (HMC) is a widely deployed method to sample from high-dimensional distributions in Statistics and Machine learning. HMC is known to run very efficiently in practice and its popular second-order ``leapfrog\" implementation has long been conjectured to run in $d^{1/4}$ gradient evaluations. Here we show that this conjecture is true when sampling from strongly log-concave target distributions that satisfy a weak third-order regularity property associated with the input data. Our regularity condition is weaker than the Lipschitz Hessian property and allows us to show faster convergence bounds for a much larger class of distributions than would be possible with the usual Lipschitz Hessian constant alone. Important distributions that satisfy our regularity condition include posterior distributions used in Bayesian logistic regression for which the data satisfies an ``incoherence\" property. Our result compares favorably with the best available bounds for the class of strongly log-concave distributions, which grow like $d^{{1}/{2}}$ gradient evaluations with the dimension. Moreover, our simulations on synthetic data suggest that, when our regularity condition is satisfied, leapfrog HMC performs better than its competitors -- both in terms of accuracy and in terms of the number of gradient evaluations it requires.", "bibtex": "@inproceedings{NEURIPS2018_e07bceab,\n author = {Mangoubi, Oren and Vishnoi, Nisheeth},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dimensionally Tight Bounds for Second-Order Hamiltonian Monte Carlo},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e07bceab69529b0f0b43625953fbf2a0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e07bceab69529b0f0b43625953fbf2a0-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e07bceab69529b0f0b43625953fbf2a0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e07bceab69529b0f0b43625953fbf2a0-Reviews.html", "metareview": "", "pdf_size": 480435, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=65787037374411642&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "EPFL; EPFL", "aff_domain": "gmail.com;gmail.com", "email": "gmail.com;gmail.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e07bceab69529b0f0b43625953fbf2a0-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Diminishing Returns Shape Constraints for Interpretability and Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11659", "id": "11659", "author_site": "Maya Gupta, Dara Bahri, Andrew Cotter, Kevin Canini", "author": "Maya Gupta; Dara Bahri; Andrew Cotter; Kevin Canini", "abstract": "We investigate machine learning models that can provide diminishing returns and accelerating returns guarantees to capture prior knowledge or policies about how outputs should depend on inputs. We show that one can build flexible, nonlinear, multi-dimensional models using lattice functions with any combination of concavity/convexity and monotonicity constraints on any subsets of features, and compare to new shape-constrained neural networks. We demonstrate on real-world examples that these shape constrained models can provide tuning-free regularization and improve model understandability.", "bibtex": "@inproceedings{NEURIPS2018_caa20203,\n author = {Gupta, Maya and Bahri, Dara and Cotter, Andrew and Canini, Kevin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Diminishing Returns Shape Constraints for Interpretability and Regularization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/caa202034f268232c26fac9435f54e15-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/caa202034f268232c26fac9435f54e15-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/caa202034f268232c26fac9435f54e15-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/caa202034f268232c26fac9435f54e15-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/caa202034f268232c26fac9435f54e15-Reviews.html", "metareview": "", "pdf_size": 538625, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10412683205308580649&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/caa202034f268232c26fac9435f54e15-Abstract.html" }, { "title": "Direct Estimation of Differences in Causal Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11376", "id": "11376", "author_site": "Yuhao Wang, Chandler Squires, Anastasiya Belyaeva, Caroline Uhler", "author": "Yuhao Wang; Chandler Squires; Anastasiya Belyaeva; Caroline Uhler", "abstract": "We consider the problem of estimating the differences between two causal directed acyclic graph (DAG) models with a shared topological order given i.i.d. samples from each model. This is of interest for example in genomics, where changes in the structure or edge weights of the underlying causal graphs reflect alterations in the gene regulatory networks. We here provide the first provably consistent method for directly estimating the differences in a pair of causal DAGs without separately learning two possibly large and dense DAG models and computing their difference. Our two-step algorithm first uses invariance tests between regression coefficients of the two data sets to estimate the skeleton of the difference graph and then orients some of the edges using invariance tests between regression residual variances. We demonstrate the properties of our method through a simulation study and apply it to the analysis of gene expression data from ovarian cancer and during T-cell activation.", "bibtex": "@inproceedings{NEURIPS2018_e1314fc0,\n author = {Wang, Yuhao and Squires, Chandler and Belyaeva, Anastasiya and Uhler, Caroline},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Direct Estimation of Differences in Causal Graphs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e1314fc026da60d837353d20aefaf054-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e1314fc026da60d837353d20aefaf054-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e1314fc026da60d837353d20aefaf054-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e1314fc026da60d837353d20aefaf054-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e1314fc026da60d837353d20aefaf054-Reviews.html", "metareview": "", "pdf_size": 743907, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6891353891081698977&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Lab for Information & Decision Systems and Institute for Data, Systems and Society, Massachusetts Institute of Technology; Lab for Information & Decision Systems and Institute for Data, Systems and Society, Massachusetts Institute of Technology; Lab for Information & Decision Systems and Institute for Data, Systems and Society, Massachusetts Institute of Technology; Lab for Information & Decision Systems and Institute for Data, Systems and Society, Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "email": "mit.edu;mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e1314fc026da60d837353d20aefaf054-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Lab for Information & Decision Systems, Institute for Data, Systems and Society", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Direct Runge-Kutta Discretization Achieves Acceleration", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11388", "id": "11388", "author_site": "Jingzhao Zhang, Aryan Mokhtari, Suvrit Sra, Ali Jadbabaie", "author": "Jingzhao Zhang; Aryan Mokhtari; Suvrit Sra; Ali Jadbabaie", "abstract": "We study gradient-based optimization methods obtained by directly discretizing a second-order ordinary differential equation (ODE) related to the continuous limit of Nesterov's accelerated gradient method. When the function is smooth enough, we show that acceleration can be achieved by a stable discretization of this ODE using standard Runge-Kutta integrators. Specifically, we prove that under Lipschitz-gradient, convexity and order-$(s+2)$ differentiability assumptions, the sequence of iterates generated by discretizing the proposed second-order ODE converges to the optimal solution at a rate of $\\mathcal{O}({N^{-2\\frac{s}{s+1}}})$, where $s$ is the order of the Runge-Kutta numerical integrator. Furthermore, we introduce a new local flatness condition on the objective, under which rates even faster than $\\mathcal{O}(N^{-2})$ can be achieved with low-order integrators and only gradient information. Notably, this flatness condition is satisfied by several standard loss functions used in machine learning. We provide numerical experiments that verify the theoretical rates predicted by our results.", "bibtex": "@inproceedings{NEURIPS2018_44968aec,\n author = {Zhang, Jingzhao and Mokhtari, Aryan and Sra, Suvrit and Jadbabaie, Ali},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Direct Runge-Kutta Discretization Achieves Acceleration},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/44968aece94f667e4095002d140b5896-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/44968aece94f667e4095002d140b5896-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/44968aece94f667e4095002d140b5896-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/44968aece94f667e4095002d140b5896-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/44968aece94f667e4095002d140b5896-Reviews.html", "metareview": "", "pdf_size": 607393, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9294010625269728824&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "LIDS, Massachusetts Institute of Technology, Cambridge, MA, 02139; LIDS, Massachusetts Institute of Technology, Cambridge, MA, 02139; LIDS, IDSS, Massachusetts Institute of Technology, Cambridge, MA, 02139; LIDS, IDSS, Massachusetts Institute of Technology, Cambridge, MA, 02139", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "email": "mit.edu;mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/44968aece94f667e4095002d140b5896-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "LIDS", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dirichlet belief networks for topic structure learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11763", "id": "11763", "author_site": "He Zhao, Lan Du, Wray Buntine, Mingyuan Zhou", "author": "He Zhao; Lan Du; Wray Buntine; Mingyuan Zhou", "abstract": "Recently, considerable research effort has been devoted to developing deep architectures for topic models to learn topic structures. Although several deep models have been proposed to learn better topic proportions of documents, how to leverage the benefits of deep structures for learning word distributions of topics has not yet been rigorously studied. Here we propose a new multi-layer generative process on word distributions of topics, where each layer consists of a set of topics and each topic is drawn from a mixture of the topics of the layer above. As the topics in all layers can be directly interpreted by words, the proposed model is able to discover interpretable topic hierarchies. As a self-contained module, our model can be flexibly adapted to different kinds of topic models to improve their modelling accuracy and interpretability. Extensive experiments on text corpora demonstrate the advantages of the proposed model.", "bibtex": "@inproceedings{NEURIPS2018_eaae5e04,\n author = {Zhao, He and Du, Lan and Buntine, Wray and Zhou, Mingyuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dirichlet belief networks for topic structure learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/eaae5e04a259d09af85c108fe4d7dd0c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/eaae5e04a259d09af85c108fe4d7dd0c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/eaae5e04a259d09af85c108fe4d7dd0c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/eaae5e04a259d09af85c108fe4d7dd0c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/eaae5e04a259d09af85c108fe4d7dd0c-Reviews.html", "metareview": "", "pdf_size": 518090, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13908644537239897303&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/eaae5e04a259d09af85c108fe4d7dd0c-Abstract.html" }, { "title": "Dirichlet-based Gaussian Processes for Large-scale Calibrated Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11583", "id": "11583", "author_site": "Dimitrios Milios, Raffaello Camoriano, Pietro Michiardi, Lorenzo Rosasco, Maurizio Filippone", "author": "Dimitrios Milios; Raffaello Camoriano; Pietro Michiardi; Lorenzo Rosasco; Maurizio Filippone", "abstract": "This paper studies the problem of deriving fast and accurate classification algorithms with uncertainty quantification. Gaussian process classification provides a principled approach, but the corresponding computational burden is hardly sustainable in large-scale problems and devising efficient alternatives is a challenge. In this work, we investigate if and how Gaussian process regression directly applied to classification labels can be used to tackle this question. While in this case training is remarkably faster, predictions need to be calibrated for classification and uncertainty estimation. To this aim, we propose a novel regression approach where the labels are obtained through the interpretation of classification labels as the coefficients of a degenerate Dirichlet distribution. Extensive experimental results show that the proposed approach provides essentially the same accuracy and uncertainty quantification as Gaussian process classification while requiring only a fraction of computational resources.", "bibtex": "@inproceedings{NEURIPS2018_b6617980,\n author = {Milios, Dimitrios and Camoriano, Raffaello and Michiardi, Pietro and Rosasco, Lorenzo and Filippone, Maurizio},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dirichlet-based Gaussian Processes for Large-scale Calibrated Classification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b6617980ce90f637e68c3ebe8b9be745-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b6617980ce90f637e68c3ebe8b9be745-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b6617980ce90f637e68c3ebe8b9be745-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b6617980ce90f637e68c3ebe8b9be745-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b6617980ce90f637e68c3ebe8b9be745-Reviews.html", "metareview": "", "pdf_size": 782416, "gs_citation": 109, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7488422957804807823&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "EURECOM, Sophia Antipolis, France; LCSL, IIT (Italy) & MIT (USA); EURECOM, Sophia Antipolis, France; DIBRIS - Universit\u00e0 degli Studi di Genova, Italy + LCSL - IIT (Italy) & MIT (USA); EURECOM, Sophia Antipolis, France", "aff_domain": "eurecom.fr;iit.it;eurecom.fr;mit.edu;eurecom.fr", "email": "eurecom.fr;iit.it;eurecom.fr;mit.edu;eurecom.fr", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b6617980ce90f637e68c3ebe8b9be745-Abstract.html", "aff_unique_index": "0;1;0;2+3;0", "aff_unique_norm": "EURECOM;IIT;Universit\u00e0 degli Studi di Genova;Istituto Italiano di Tecnologia", "aff_unique_dep": ";LCSL;DIBRIS;LCSL", "aff_unique_url": "https://www.eurecom.fr;;https://www.unige.it;https://www.iit.it", "aff_unique_abbr": ";IIT;;IIT", "aff_campus_unique_index": "0;0;;0", "aff_campus_unique": "Sophia Antipolis;", "aff_country_unique_index": "0;1;0;1+1;0", "aff_country_unique": "France;Italy" }, { "title": "Disconnected Manifold Learning for Generative Adversarial Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11707", "id": "11707", "author_site": "Mahyar Khayatkhoei, Maneesh Singh, Ahmed Elgammal", "author": "Mahyar Khayatkhoei; Maneesh K. Singh; Ahmed Elgammal", "abstract": "Natural images may lie on a union of disjoint manifolds rather than one globally connected manifold, and this can cause several difficulties for the training of common Generative Adversarial Networks (GANs). In this work, we first show that single generator GANs are unable to correctly model a distribution supported on a disconnected manifold, and investigate how sample quality, mode dropping and local convergence are affected by this. Next, we show how using a collection of generators can address this problem, providing new insights into the success of such multi-generator GANs. Finally, we explain the serious issues caused by considering a fixed prior over the collection of generators and propose a novel approach for learning the prior and inferring the necessary number of generators without any supervision. Our proposed modifications can be applied on top of any other GAN model to enable learning of distributions supported on disconnected manifolds. We conduct several experiments to illustrate the aforementioned shortcoming of GANs, its consequences in practice, and the effectiveness of our proposed modifications in alleviating these issues.", "bibtex": "@inproceedings{NEURIPS2018_2b346a0a,\n author = {Khayatkhoei, Mahyar and Singh, Maneesh K. and Elgammal, Ahmed},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Disconnected Manifold Learning for Generative Adversarial Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2b346a0aa375a07f5a90a344a61416c4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2b346a0aa375a07f5a90a344a61416c4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2b346a0aa375a07f5a90a344a61416c4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2b346a0aa375a07f5a90a344a61416c4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2b346a0aa375a07f5a90a344a61416c4-Reviews.html", "metareview": "", "pdf_size": 3854647, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1483329680707180857&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science, Rutgers University; Department of Computer Science, Rutgers University; Verisk Analytics", "aff_domain": "cs.rutgers.edu;cs.rutgers.edu;verisk.com", "email": "cs.rutgers.edu;cs.rutgers.edu;verisk.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2b346a0aa375a07f5a90a344a61416c4-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Rutgers University;Verisk Analytics", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.rutgers.edu;https://www.verisk.com", "aff_unique_abbr": "Rutgers;Verisk", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Discovery of Latent 3D Keypoints via End-to-end Geometric Reasoning", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11218", "id": "11218", "author_site": "Supasorn Suwajanakorn, Noah Snavely, Jonathan Tompson, Mohammad Norouzi", "author": "Supasorn Suwajanakorn; Noah Snavely; Jonathan J Tompson; Mohammad Norouzi", "abstract": "This paper presents KeypointNet, an end-to-end geometric reasoning framework to learn an optimal set of category-specific keypoints, along with their detectors to predict 3D keypoints in a single 2D input image. We demonstrate this framework on 3D pose estimation task by proposing a differentiable pose objective that seeks the optimal set of keypoints for recovering the relative pose between two views of an object. Our network automatically discovers a consistent set of keypoints across viewpoints of a single object as well as across all object instances of a given object class. Importantly, we find that our end-to-end approach using no ground-truth keypoint annotations outperforms a fully supervised baseline using the same neural network architecture for the pose estimation task. \nThe discovered 3D keypoints across the car, chair, and plane\ncategories of ShapeNet are visualized at https://keypoints.github.io/", "bibtex": "@inproceedings{NEURIPS2018_24146db4,\n author = {Suwajanakorn, Supasorn and Snavely, Noah and Tompson, Jonathan J and Norouzi, Mohammad},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Discovery of Latent 3D Keypoints via End-to-end Geometric Reasoning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/24146db4eb48c718b84cae0a0799dcfc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/24146db4eb48c718b84cae0a0799dcfc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/24146db4eb48c718b84cae0a0799dcfc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/24146db4eb48c718b84cae0a0799dcfc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/24146db4eb48c718b84cae0a0799dcfc-Reviews.html", "metareview": "", "pdf_size": 1307592, "gs_citation": 288, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2478389451024960403&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Vidyasirimedhi Institute of Science and Technology; Google AI; Google AI; Google AI", "aff_domain": "vistec.ac.th;google.com;google.com;google.com", "email": "vistec.ac.th;google.com;google.com;google.com", "github": "keypointnet.github.io", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/24146db4eb48c718b84cae0a0799dcfc-Abstract.html", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Vidyasirimedhi Institute of Science and Technology;Google", "aff_unique_dep": ";Google AI", "aff_unique_url": "https://www.vistec.ac.th;https://ai.google", "aff_unique_abbr": "VISTEC;Google AI", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Thailand;United States" }, { "title": "Discretely Relaxing Continuous Variables for tractable Variational Inference", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11990", "id": "11990", "author_site": "Trefor Evans, Prasanth Nair", "author": "Trefor Evans; Prasanth Nair", "abstract": "We explore a new research direction in Bayesian variational inference with discrete latent variable priors where we exploit Kronecker matrix algebra for efficient and exact computations of the evidence lower bound (ELBO). The proposed \"DIRECT\" approach has several advantages over its predecessors; (i) it can exactly compute ELBO gradients (i.e. unbiased, zero-variance gradient estimates), eliminating the need for high-variance stochastic gradient estimators and enabling the use of quasi-Newton optimization methods; (ii) its training complexity is independent of the number of training points, permitting inference on large datasets; and (iii) its posterior samples consist of sparse and low-precision quantized integers which permit fast inference on hardware limited devices. In addition, our DIRECT models can exactly compute statistical moments of the parameterized predictive posterior without relying on Monte Carlo sampling. The DIRECT approach is not practical for all likelihoods, however, we identify a popular model structure which is practical, and demonstrate accurate inference using latent variables discretized as extremely low-precision 4-bit quantized integers. While the ELBO computations considered in the numerical studies require over 10^2352 log-likelihood evaluations, we train on datasets with over two-million points in just seconds.", "bibtex": "@inproceedings{NEURIPS2018_7790583c,\n author = {Evans, Trefor and Nair, Prasanth},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Discretely Relaxing Continuous Variables for tractable Variational Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7790583c0d8d74e930a4441ad75ebc64-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7790583c0d8d74e930a4441ad75ebc64-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7790583c0d8d74e930a4441ad75ebc64-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7790583c0d8d74e930a4441ad75ebc64-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7790583c0d8d74e930a4441ad75ebc64-Reviews.html", "metareview": "", "pdf_size": 365266, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2800829415801213733&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "University of Toronto; University of Toronto", "aff_domain": "mail.utoronto.ca;utias.utoronto.ca", "email": "mail.utoronto.ca;utias.utoronto.ca", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7790583c0d8d74e930a4441ad75ebc64-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Discrimination-aware Channel Pruning for Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11109", "id": "11109", "author_site": "Zhuangwei Zhuang, Mingkui Tan, Bohan Zhuang, Jing Liu, Yong Guo, Qingyao Wu, Junzhou Huang, Jinhui Zhu", "author": "Zhuangwei Zhuang; Mingkui Tan; Bohan Zhuang; Jing Liu; Yong Guo; Qingyao Wu; Junzhou Huang; Jinhui Zhu", "abstract": "Channel pruning is one of the predominant approaches for deep model compression. Existing pruning methods either train from scratch with sparsity constraints on channels, or minimize the reconstruction error between the pre-trained feature maps and the compressed ones. Both strategies suffer from some limitations: the former kind is computationally expensive and difficult to converge, whilst the latter kind optimizes the reconstruction error but ignores the discriminative power of channels. To overcome these drawbacks, we investigate a simple-yet-effective method, called discrimination-aware channel pruning, to choose those channels that really contribute to discriminative power. To this end, we introduce additional losses into the network to increase the discriminative power of intermediate layers and then select the most discriminative channels for each layer by considering the additional loss and the reconstruction error. Last, we propose a greedy algorithm to conduct channel selection and parameter optimization in an iterative way. Extensive experiments demonstrate the effectiveness of our method. For example, on ILSVRC-12, our pruned ResNet-50 with 30% reduction of channels even outperforms the original model by 0.39% in top-1 accuracy.", "bibtex": "@inproceedings{NEURIPS2018_55a7cf9c,\n author = {Zhuang, Zhuangwei and Tan, Mingkui and Zhuang, Bohan and Liu, Jing and Guo, Yong and Wu, Qingyao and Huang, Junzhou and Zhu, Jinhui},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Discrimination-aware Channel Pruning for Deep Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/55a7cf9c71f1c9c495413f934dd1a158-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/55a7cf9c71f1c9c495413f934dd1a158-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/55a7cf9c71f1c9c495413f934dd1a158-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/55a7cf9c71f1c9c495413f934dd1a158-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/55a7cf9c71f1c9c495413f934dd1a158-Reviews.html", "metareview": "", "pdf_size": 706603, "gs_citation": 791, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4423411645597495&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "South China University of Technology; South China University of Technology+Tencent AI Lab; The University of Adelaide; South China University of Technology; South China University of Technology; South China University of Technology; University of Texas at Arlington+Tencent AI Lab; South China University of Technology+Tencent AI Lab", "aff_domain": "mail.scut.edu.cn;scut.edu.cn;adelaide.edu.au;mail.scut.edu.cn;mail.scut.edu.cn;scut.edu.cn;uta.edu;scut.edu.cn", "email": "mail.scut.edu.cn;scut.edu.cn;adelaide.edu.au;mail.scut.edu.cn;mail.scut.edu.cn;scut.edu.cn;uta.edu;scut.edu.cn", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/55a7cf9c71f1c9c495413f934dd1a158-Abstract.html", "aff_unique_index": "0;0+1;2;0;0;0;3+1;0+1", "aff_unique_norm": "South China University of Technology;Tencent;University of Adelaide;University of Texas at Arlington", "aff_unique_dep": ";Tencent AI Lab;;", "aff_unique_url": "https://www.scut.edu.cn;https://ai.tencent.com;https://www.adelaide.edu.au;https://www.uta.edu", "aff_unique_abbr": "SCUT;Tencent AI Lab;Adelaide;UTA", "aff_campus_unique_index": ";1;", "aff_campus_unique": ";Arlington", "aff_country_unique_index": "0;0+0;1;0;0;0;2+0;0+0", "aff_country_unique": "China;Australia;United States" }, { "title": "Distilled Wasserstein Learning for Word Embedding and Topic Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11185", "id": "11185", "author_site": "Hongteng Xu, Wenlin Wang, Wei Liu, Lawrence Carin", "author": "Hongteng Xu; Wenlin Wang; Wei Liu; Lawrence Carin", "abstract": "We propose a novel Wasserstein method with a distillation mechanism, yielding joint learning of word embeddings and topics. \nThe proposed method is based on the fact that the Euclidean distance between word embeddings may be employed as the underlying distance in the Wasserstein topic model. \nThe word distributions of topics, their optimal transport to the word distributions of documents, and the embeddings of words are learned in a unified framework. \nWhen learning the topic model, we leverage a distilled ground-distance matrix to update the topic distributions and smoothly calculate the corresponding optimal transports. \nSuch a strategy provides the updating of word embeddings with robust guidance, improving algorithm convergence. \nAs an application, we focus on patient admission records, in which the proposed method embeds the codes of diseases and procedures and learns the topics of admissions, obtaining superior performance on clinically-meaningful disease network construction, mortality prediction as a function of admission codes, and procedure recommendation.", "bibtex": "@inproceedings{NEURIPS2018_22fb0cee,\n author = {Xu, Hongteng and Wang, Wenlin and Liu, Wei and Carin, Lawrence},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Distilled Wasserstein Learning for Word Embedding and Topic Modeling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/22fb0cee7e1f3bde58293de743871417-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/22fb0cee7e1f3bde58293de743871417-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/22fb0cee7e1f3bde58293de743871417-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/22fb0cee7e1f3bde58293de743871417-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/22fb0cee7e1f3bde58293de743871417-Reviews.html", "metareview": "", "pdf_size": 451663, "gs_citation": 105, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4146602806717396539&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "In\ufb01nia ML, Inc.+Duke University; Duke University; Tencent AI Lab; Duke University", "aff_domain": "infiniaml.com; ; ; ", "email": "infiniaml.com; ; ; ", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/22fb0cee7e1f3bde58293de743871417-Abstract.html", "aff_unique_index": "0+1;1;2;1", "aff_unique_norm": "In\ufb01nia ML;Duke University;Tencent", "aff_unique_dep": "In\ufb01nia ML;;Tencent AI Lab", "aff_unique_url": "https://www.infinia-ml.com;https://www.duke.edu;https://ai.tencent.com", "aff_unique_abbr": "In\ufb01nia ML;Duke;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Distributed $k$-Clustering for Data with Heavy Noise", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11752", "id": "11752", "author_site": "Shi Li, Xiangyu Guo", "author": "Shi Li; Xiangyu Guo", "abstract": "In this paper, we consider the $k$-center/median/means clustering with outliers problems (or the $(k, z)$-center/median/means problems) in the distributed setting. Most previous distributed algorithms have their communication costs linearly depending on $z$, the number of outliers. Recently Guha et al.[10] overcame this dependence issue by considering bi-criteria approximation algorithms that output solutions with $2z$ outliers. For the case where $z$ is large, the extra $z$ outliers discarded by the algorithms might be too large, considering that the data gathering process might be costly. In this paper, we improve the number of outliers to the best possible $(1+\\epsilon)z$, while maintaining the $O(1)$-approximation ratio and independence of communication cost on $z$. The problems we consider include the $(k, z)$-center problem, and $(k, z)$-median/means problems in Euclidean metrics. Implementation of the our algorithm for $(k, z)$-center shows that it outperforms many previous algorithms, both in terms of the communication cost and quality of the output solution.", "bibtex": "@inproceedings{NEURIPS2018_2fe5a27c,\n author = {Li, Shi and Guo, Xiangyu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Distributed k-Clustering for Data with Heavy Noise},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2fe5a27cde066c0b65acb8f2c1717464-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2fe5a27cde066c0b65acb8f2c1717464-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2fe5a27cde066c0b65acb8f2c1717464-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2fe5a27cde066c0b65acb8f2c1717464-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2fe5a27cde066c0b65acb8f2c1717464-Reviews.html", "metareview": "", "pdf_size": 446875, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4052545958640287143&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University at Buffalo; University at Buffalo", "aff_domain": "buffalo.edu;buffalo.edu", "email": "buffalo.edu;buffalo.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2fe5a27cde066c0b65acb8f2c1717464-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University at Buffalo", "aff_unique_dep": "", "aff_unique_url": "https://www.buffalo.edu", "aff_unique_abbr": "UB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Distributed Learning without Distress: Privacy-Preserving Empirical Risk Minimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11614", "id": "11614", "author_site": "Bargav Jayaraman, Lingxiao Wang, David Evans, Quanquan Gu", "author": "Bargav Jayaraman; Lingxiao Wang; David Evans; Quanquan Gu", "abstract": "Distributed learning allows a group of independent data owners to collaboratively learn a model over their data sets without exposing their private data. We present a distributed learning approach that combines differential privacy with secure multi-party computation. We explore two popular methods of differential privacy, output perturbation and gradient perturbation, and advance the state-of-the-art for both methods in the distributed learning setting. In our output perturbation method, the parties combine local models within a secure computation and then add the required differential privacy noise before revealing the model. In our gradient perturbation method, the data owners collaboratively train a global model via an iterative learning algorithm. At each iteration, the parties aggregate their local gradients within a secure computation, adding sufficient noise to ensure privacy before the gradient updates are revealed. For both methods, we show that the noise can be reduced in the multi-party setting by adding the noise inside the secure computation after aggregation, asymptotically improving upon the best previous results. Experiments on real world data sets demonstrate that our methods provide substantial utility gains for typical privacy requirements.", "bibtex": "@inproceedings{NEURIPS2018_7221e5c8,\n author = {Jayaraman, Bargav and Wang, Lingxiao and Evans, David and Gu, Quanquan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Distributed Learning without Distress: Privacy-Preserving Empirical Risk Minimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7221e5c8ec6b08ef6d3f9ff3ce6eb1d1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7221e5c8ec6b08ef6d3f9ff3ce6eb1d1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7221e5c8ec6b08ef6d3f9ff3ce6eb1d1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7221e5c8ec6b08ef6d3f9ff3ce6eb1d1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7221e5c8ec6b08ef6d3f9ff3ce6eb1d1-Reviews.html", "metareview": "", "pdf_size": 353778, "gs_citation": 213, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10577380829443665980&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Department of Computer Science, University of Virginia; Department of Computer Science, University of California, Los Angeles; Department of Computer Science, University of Virginia; Department of Computer Science, University of California, Los Angeles", "aff_domain": "virginia.edu;cs.ucla.edu;virginia.edu;cs.ucla.edu", "email": "virginia.edu;cs.ucla.edu;virginia.edu;cs.ucla.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7221e5c8ec6b08ef6d3f9ff3ce6eb1d1-Abstract.html", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Virginia;University of California, Los Angeles", "aff_unique_dep": "Department of Computer Science;Department of Computer Science", "aff_unique_url": "https://www.virginia.edu;https://www.ucla.edu", "aff_unique_abbr": "UVA;UCLA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Distributed Multi-Player Bandits - a Game of Thrones Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11695", "id": "11695", "author_site": "Ilai Bistritz, Amir Leshem", "author": "Ilai Bistritz; Amir Leshem", "abstract": "We consider a multi-armed bandit game where N players compete for K arms for T turns. Each player has different expected rewards for the arms, and the instantaneous rewards are independent and identically distributed. Performance is measured using the expected sum of regrets, compared to the optimal assignment of arms to players. We assume that each player only knows her actions and the reward she received each turn. Players cannot observe the actions of other players, and no communication between players is possible. We present a distributed algorithm and prove that it achieves an expected sum of regrets of near-O\\left(\\log^{2}T\\right). This is the first algorithm to achieve a poly-logarithmic regret in this fully distributed scenario. All other works have assumed that either all players have the same vector of expected rewards or that communication between players is possible.", "bibtex": "@inproceedings{NEURIPS2018_c2964caa,\n author = {Bistritz, Ilai and Leshem, Amir},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Distributed Multi-Player Bandits - a Game of Thrones Approach},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c2964caac096f26db222cb325aa267cb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c2964caac096f26db222cb325aa267cb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c2964caac096f26db222cb325aa267cb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c2964caac096f26db222cb325aa267cb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c2964caac096f26db222cb325aa267cb-Reviews.html", "metareview": "", "pdf_size": 430413, "gs_citation": 174, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=636361646802622019&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Stanford University; Bar Ilan University", "aff_domain": "stanford.edu;biu.ac.il", "email": "stanford.edu;biu.ac.il", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c2964caac096f26db222cb325aa267cb-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;Bar-Ilan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.biu.ac.il", "aff_unique_abbr": "Stanford;BIU", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Israel" }, { "title": "Distributed Multitask Reinforcement Learning with Quadratic Convergence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11850", "id": "11850", "author_site": "Rasul Tutunov, Dongho Kim, Haitham Bou Ammar", "author": "Rasul Tutunov; Dongho Kim; Haitham Bou Ammar", "abstract": "Multitask reinforcement learning (MTRL) suffers from scalability issues when the number of tasks or trajectories grows large. The main reason behind this drawback is the reliance on centeralised solutions. Recent methods exploited the connection between MTRL and general consensus to propose scalable solutions. These methods, however, suffer from two drawbacks. First, they rely on predefined objectives, and, second, exhibit linear convergence guarantees. In this paper, we improve over state-of-the-art by deriving multitask reinforcement learning from a variational inference perspective. We then propose a novel distributed solver for MTRL with quadratic convergence guarantees.", "bibtex": "@inproceedings{NEURIPS2018_8073bd4e,\n author = {Tutunov, Rasul and Kim, Dongho and Bou Ammar, Haitham},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Distributed Multitask Reinforcement Learning with Quadratic Convergence},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8073bd4ed0fe0c330290c58056a2cd5e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8073bd4ed0fe0c330290c58056a2cd5e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8073bd4ed0fe0c330290c58056a2cd5e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8073bd4ed0fe0c330290c58056a2cd5e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8073bd4ed0fe0c330290c58056a2cd5e-Reviews.html", "metareview": "", "pdf_size": 2704956, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13278287462613537475&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "PROWLER.io, Cambridge, United Kingdom; PROWLER.io, Cambridge, United Kingdom; PROWLER.io, Cambridge, United Kingdom", "aff_domain": "prowler.io;prowler.io;prowler.io", "email": "prowler.io;prowler.io;prowler.io", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8073bd4ed0fe0c330290c58056a2cd5e-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "PROWLER.io", "aff_unique_dep": "", "aff_unique_url": "https://prowler.io", "aff_unique_abbr": "", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Distributed Stochastic Optimization via Adaptive SGD", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11203", "id": "11203", "author_site": "Ashok Cutkosky, R\u00f3bert Busa-Fekete", "author": "Ashok Cutkosky; R\u00f3bert Busa-Fekete", "abstract": "Stochastic convex optimization algorithms are the most popular way to train machine learning models on large-scale data. Scaling up the training process of these models is crucial, but the most popular algorithm, Stochastic Gradient Descent (SGD), is a serial method that is surprisingly hard to parallelize. In this paper, we propose an efficient distributed stochastic optimization method by combining adaptivity with variance reduction techniques. Our analysis yields a linear speedup in the number of machines, constant memory footprint, and only a logarithmic number of communication rounds. Critically, our approach is a black-box reduction that parallelizes any serial online learning algorithm, streamlining prior analysis and allowing us to leverage the significant progress that has been made in designing adaptive algorithms. In particular, we achieve optimal convergence rates without any prior knowledge of smoothness parameters, yielding a more robust algorithm that reduces the need for hyperparameter tuning. We implement our algorithm in the Spark distributed framework and exhibit dramatic performance gains on large-scale logistic regression problems.", "bibtex": "@inproceedings{NEURIPS2018_5c936263,\n author = {Cutkosky, Ashok and Busa-Fekete, R\\'{o}bert},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Distributed Stochastic Optimization via Adaptive SGD},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5c936263f3428a40227908d5a3847c0b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5c936263f3428a40227908d5a3847c0b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5c936263f3428a40227908d5a3847c0b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5c936263f3428a40227908d5a3847c0b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5c936263f3428a40227908d5a3847c0b-Reviews.html", "metareview": "", "pdf_size": 895056, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9794817984535533861&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Stanford University, USA\u21e4; Yahoo! Research, New York, USA", "aff_domain": "google.com;oath.com", "email": "google.com;oath.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5c936263f3428a40227908d5a3847c0b-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;Yahoo! Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://research.yahoo.com", "aff_unique_abbr": "Stanford;Yahoo! Res", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;New York", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Distributed Weight Consolidation: A Brain Segmentation Case Study", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11406", "id": "11406", "author_site": "Patrick McClure, Charles Zheng, Jakub R Kaczmarzyk, John Rogers-Lee, Satra Ghosh, Dylan Nielson, Peter A Bandettini, Francisco Pereira", "author": "Patrick McClure; Charles Y Zheng; Jakub Kaczmarzyk; John Rogers-Lee; Satra Ghosh; Dylan Nielson; Peter A Bandettini; Francisco Pereira", "abstract": "Collecting the large datasets needed to train deep neural networks can be very difficult, particularly for the many applications for which sharing and pooling data is complicated by practical, ethical, or legal concerns. However, it may be the case that derivative datasets or predictive models developed within individual sites can be shared and combined with fewer restrictions. Training on distributed data and combining the resulting networks is often viewed as continual learning, but these methods require networks to be trained sequentially. In this paper, we introduce distributed weight consolidation (DWC), a continual learning method to consolidate the weights of separate neural networks, each trained on an independent dataset. We evaluated DWC with a brain segmentation case study, where we consolidated dilated convolutional neural networks trained on independent structural magnetic resonance imaging (sMRI) datasets from different sites. We found that DWC led to increased performance on test sets from the different sites, while maintaining generalization performance for a very large and completely independent multi-site dataset, compared to an ensemble baseline.", "bibtex": "@inproceedings{NEURIPS2018_093b60fd,\n author = {McClure, Patrick and Zheng, Charles Y and Kaczmarzyk, Jakub and Rogers-Lee, John and Ghosh, Satra and Nielson, Dylan and Bandettini, Peter A and Pereira, Francisco},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Distributed Weight Consolidation: A Brain Segmentation Case Study},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/093b60fd0557804c8ba0cbf1453da22f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/093b60fd0557804c8ba0cbf1453da22f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/093b60fd0557804c8ba0cbf1453da22f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/093b60fd0557804c8ba0cbf1453da22f-Reviews.html", "metareview": "", "pdf_size": 1833564, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14339758160135738131&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "National Institute of Mental Health; National Institute of Mental Health; Massachusetts Institute of Technology; National Institute of Mental Health; Massachusetts Institute of Technology; National Institute of Mental Health; National Institute of Mental Health; National Institute of Mental Health", "aff_domain": "nih.gov;nih.gov;mit.edu;nih.gov;mit.edu;nih.gov;nih.gov;nih.gov", "email": "nih.gov;nih.gov;mit.edu;nih.gov;mit.edu;nih.gov;nih.gov;nih.gov", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/093b60fd0557804c8ba0cbf1453da22f-Abstract.html", "aff_unique_index": "0;0;1;0;1;0;0;0", "aff_unique_norm": "National Institute of Mental Health;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nimh.nih.gov;https://web.mit.edu", "aff_unique_abbr": "NIMH;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Distributionally Robust Graphical Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11798", "id": "11798", "author_site": "Rizal Fathony, Ashkan Rezaei, Mohammad Ali Bashiri, Xinhua Zhang, Brian Ziebart", "author": "Rizal Fathony; Ashkan Rezaei; Mohammad Ali Bashiri; Xinhua Zhang; Brian Ziebart", "abstract": "In many structured prediction problems, complex relationships between variables are compactly defined using graphical structures. The most prevalent graphical prediction methods---probabilistic graphical models and large margin methods---have their own distinct strengths but also possess significant drawbacks. Conditional random fields (CRFs) are Fisher consistent, but they do not permit integration of customized loss metrics into their learning process. Large-margin models, such as structured support vector machines (SSVMs), have the flexibility to incorporate customized loss metrics, but lack Fisher consistency guarantees. We present adversarial graphical models (AGM), a distributionally robust approach for constructing a predictor that performs robustly for a class of data distributions defined using a graphical structure. Our approach enjoys both the flexibility of incorporating customized loss metrics into its design as well as the statistical guarantee of Fisher consistency. We present exact learning and prediction algorithms for AGM with time complexity similar to existing graphical models and show the practical benefits of our approach with experiments.", "bibtex": "@inproceedings{NEURIPS2018_79a3308b,\n author = {Fathony, Rizal and Rezaei, Ashkan and Bashiri, Mohammad Ali and Zhang, Xinhua and Ziebart, Brian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Distributionally Robust Graphical Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/79a3308b13cd31f096d8a4a34f96b66b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/79a3308b13cd31f096d8a4a34f96b66b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/79a3308b13cd31f096d8a4a34f96b66b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/79a3308b13cd31f096d8a4a34f96b66b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/79a3308b13cd31f096d8a4a34f96b66b-Reviews.html", "metareview": "", "pdf_size": 460890, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12961552134083718493&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/79a3308b13cd31f096d8a4a34f96b66b-Abstract.html" }, { "title": "Diverse Ensemble Evolution: Curriculum Data-Model Marriage", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11574", "id": "11574", "author_site": "Tianyi Zhou, Shengjie Wang, Jeffrey A Bilmes", "author": "Tianyi Zhou; Shengjie Wang; Jeff A. Bilmes", "abstract": "We study a new method (``Diverse Ensemble Evolution (DivE$^2$)'') to train an ensemble of machine learning models that assigns data to models at each training epoch based on each model's current expertise and an intra- and inter-model diversity reward. DivE$^2$ schedules, over the course of training epochs, the relative importance of these characteristics; it starts by selecting easy samples for each model, and then gradually adjusts towards the models having specialized and complementary expertise on subsets of the training data, thereby encouraging high accuracy of the ensemble. We utilize an intra-model diversity term on data assigned to each model, and an inter-model diversity term on data assigned to pairs of models, to penalize both within-model and cross-model redundancy. We formulate the data-model marriage problem as a generalized bipartite matching, represented as submodular maximization subject to two matroid constraints. DivE$^2$ solves a sequence of continuous-combinatorial optimizations with slowly varying objectives and constraints. The combinatorial part handles the data-model marriage while the continuous part updates model parameters based on the assignments. In experiments, DivE$^2$ outperforms other ensemble training methods under a variety of model aggregation techniques, while also maintaining competitive efficiency.", "bibtex": "@inproceedings{NEURIPS2018_3070e6ad,\n author = {Zhou, Tianyi and Wang, Shengjie and Bilmes, Jeff A},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Diverse Ensemble Evolution: Curriculum Data-Model Marriage},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3070e6addcd702cb58de5d7897bfdae1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3070e6addcd702cb58de5d7897bfdae1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3070e6addcd702cb58de5d7897bfdae1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3070e6addcd702cb58de5d7897bfdae1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3070e6addcd702cb58de5d7897bfdae1-Reviews.html", "metareview": "", "pdf_size": 2360712, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=168021665770244419&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Depts. of Computer Science and Engineering, and Electrical and Computer Engineering, University of Washington, Seattle; Depts. of Computer Science and Engineering, and Electrical and Computer Engineering, University of Washington, Seattle; Depts. of Computer Science and Engineering, and Electrical and Computer Engineering, University of Washington, Seattle", "aff_domain": "uw.edu;uw.edu;uw.edu", "email": "uw.edu;uw.edu;uw.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3070e6addcd702cb58de5d7897bfdae1-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Diversity-Driven Exploration Strategy for Deep Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11992", "id": "11992", "author_site": "Zhang-Wei Hong, Tzu-Yun Shann, Shih-Yang Su, Yi-Hsiang Chang, Tsu-Jui Fu, Chun-Yi Lee", "author": "Zhang-Wei Hong; Tzu-Yun Shann; Shih-Yang Su; Yi-Hsiang Chang; Tsu-Jui Fu; Chun-Yi Lee", "abstract": "Efficient exploration remains a challenging research problem in reinforcement learning, especially when an environment contains large state spaces, deceptive local optima, or sparse rewards.\nTo tackle this problem, we present a diversity-driven approach for exploration, which can be easily combined with both off- and on-policy reinforcement learning algorithms. We show that by simply adding a distance measure to the loss function, the proposed methodology significantly enhances an agent's exploratory behaviors, and thus preventing the policy from being trapped in local optima. We further propose an adaptive scaling method for stabilizing the learning process. We demonstrate the effectiveness of our method in huge 2D gridworlds and a variety of benchmark environments, including Atari 2600 and MuJoCo. Experimental results show that our method outperforms baseline approaches in most tasks in terms of mean scores and exploration efficiency.", "bibtex": "@inproceedings{NEURIPS2018_a2802cad,\n author = {Hong, Zhang-Wei and Shann, Tzu-Yun and Su, Shih-Yang and Chang, Yi-Hsiang and Fu, Tsu-Jui and Lee, Chun-Yi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Diversity-Driven Exploration Strategy for Deep Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a2802cade04644083dcde1c8c483ed9a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a2802cade04644083dcde1c8c483ed9a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a2802cade04644083dcde1c8c483ed9a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a2802cade04644083dcde1c8c483ed9a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a2802cade04644083dcde1c8c483ed9a-Reviews.html", "metareview": "", "pdf_size": 5784949, "gs_citation": 153, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10905735139638962929&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science, National Tsing Hua University; Department of Computer Science, National Tsing Hua University; Department of Computer Science, National Tsing Hua University; Department of Computer Science, National Tsing Hua University; Department of Computer Science, National Tsing Hua University; Department of Computer Science, National Tsing Hua University", "aff_domain": "gapp.nthu.edu.tw;gapp.nthu.edu.tw;gapp.nthu.edu.tw;gapp.nthu.edu.tw;gapp.nthu.edu.tw;gapp.nthu.edu.tw", "email": "gapp.nthu.edu.tw;gapp.nthu.edu.tw;gapp.nthu.edu.tw;gapp.nthu.edu.tw;gapp.nthu.edu.tw;gapp.nthu.edu.tw", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a2802cade04644083dcde1c8c483ed9a-Abstract.html", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "National Tsing Hua University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.nthu.edu.tw", "aff_unique_abbr": "NTHU", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Do Less, Get More: Streaming Submodular Maximization with Subsampling", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11095", "id": "11095", "author_site": "Moran Feldman, Amin Karbasi, Ehsan Kazemi", "author": "Moran Feldman; Amin Karbasi; Ehsan Kazemi", "abstract": "In this paper, we develop the first one-pass streaming algorithm for submodular maximization that does not evaluate the entire stream even once. By carefully subsampling each element of the data stream, our algorithm enjoys the tightest approximation guarantees in various settings while having the smallest memory footprint and requiring the lowest number of function evaluations. More specifically, for a monotone submodular function and a $p$-matchoid constraint, our randomized algorithm achieves a $4p$ approximation ratio (in expectation) with $O(k)$ memory and $O(km/p)$ queries per element ($k$ is the size of the largest feasible solution and $m$ is the number of matroids used to define the constraint). For the non-monotone case, our approximation ratio increases only slightly to $4p+2-o(1)$. To the best or our knowledge, our algorithm is the first that combines the benefits of streaming and subsampling in a novel way in order to truly scale submodular maximization to massive machine learning problems. To showcase its practicality, we empirically evaluated the performance of our algorithm on a video summarization application and observed that it outperforms the state-of-the-art algorithm by up to fifty-fold while maintaining practically the same utility. We also evaluated the scalability of our algorithm on a large dataset of Uber pick up locations.", "bibtex": "@inproceedings{NEURIPS2018_d1f255a3,\n author = {Feldman, Moran and Karbasi, Amin and Kazemi, Ehsan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Do Less, Get More: Streaming Submodular Maximization with Subsampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d1f255a373a3cef72e03aa9d980c7eca-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d1f255a373a3cef72e03aa9d980c7eca-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d1f255a373a3cef72e03aa9d980c7eca-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d1f255a373a3cef72e03aa9d980c7eca-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d1f255a373a3cef72e03aa9d980c7eca-Reviews.html", "metareview": "", "pdf_size": 987747, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1619479986901381663&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Open University of Israel; Yale University; Yale University", "aff_domain": "openu.ac.il;yale.edu;yale.edu", "email": "openu.ac.il;yale.edu;yale.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d1f255a373a3cef72e03aa9d980c7eca-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Open University of Israel;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.openu.ac.il;https://www.yale.edu", "aff_unique_abbr": "OUI;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Israel;United States" }, { "title": "Does mitigating ML's impact disparity require treatment disparity?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11778", "id": "11778", "author_site": "Zachary Lipton, Julian McAuley, Alexandra Chouldechova", "author": "Zachary Lipton; Julian McAuley; Alexandra Chouldechova", "abstract": "Following precedent in employment discrimination law, two notions of disparity are widely-discussed in papers on fairness and ML. Algorithms exhibit treatment disparity if they formally treat members of protected subgroups differently;\nalgorithms exhibit impact disparity when outcomes differ across subgroups (even unintentionally). Naturally, we can achieve impact parity through purposeful treatment disparity. One line of papers aims to reconcile the two parities proposing disparate learning processes (DLPs). Here, the sensitive feature is used during training but a group-blind classifier is produced. In this paper, we show that: (i) when sensitive and (nominally) nonsensitive features are correlated, DLPs will indirectly implement treatment disparity, undermining the policy desiderata they are designed to address; (ii) when group membership is partly revealed by other features, DLPs induce within-class discrimination; and (iii) in general, DLPs provide suboptimal trade-offs between accuracy and impact parity. Experimental results on several real-world datasets highlight the practical consequences of applying DLPs.", "bibtex": "@inproceedings{NEURIPS2018_8e038477,\n author = {Lipton, Zachary and McAuley, Julian and Chouldechova, Alexandra},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Does mitigating ML\\textquotesingle s impact disparity require treatment disparity?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8e0384779e58ce2af40eb365b318cc32-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8e0384779e58ce2af40eb365b318cc32-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8e0384779e58ce2af40eb365b318cc32-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8e0384779e58ce2af40eb365b318cc32-Reviews.html", "metareview": "", "pdf_size": 566805, "gs_citation": 270, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11454641024378276086&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Carnegie Mellon University; Carnegie Mellon University; University of California, San Diego", "aff_domain": "cmu.edu;cmu.edu;cs.ucsd.edu", "email": "cmu.edu;cmu.edu;cs.ucsd.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8e0384779e58ce2af40eb365b318cc32-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Carnegie Mellon University;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.ucsd.edu", "aff_unique_abbr": "CMU;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Domain Adaptation by Using Causal Inference to Predict Invariant Conditional Distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12025", "id": "12025", "author_site": "Sara Magliacane, Thijs van Ommen, Tom Claassen, Stephan Bongers, Philip Versteeg, Joris Mooij", "author": "Sara Magliacane; Thijs van Ommen; Tom Claassen; Stephan Bongers; Philip Versteeg; Joris M. Mooij", "abstract": "An important goal common to domain adaptation and causal inference is to make accurate predictions when the distributions for the source (or training) domain(s) and target (or test) domain(s) differ. In many cases, these different distributions can be modeled as different contexts of a single underlying system, in which each distribution corresponds to a different perturbation of the system, or in causal terms, an intervention. We focus on a class of such causal domain adaptation problems, where data for one or more source domains are given, and the task is to predict the distribution of a certain target variable from measurements of other variables in one or more target domains. We propose an approach for solving these problems that exploits causal inference and does not rely on prior knowledge of the causal graph, the type of interventions or the intervention targets. We demonstrate our approach by evaluating a possible implementation on simulated and real world data.", "bibtex": "@inproceedings{NEURIPS2018_39e98420,\n author = {Magliacane, Sara and van Ommen, Thijs and Claassen, Tom and Bongers, Stephan and Versteeg, Philip and Mooij, Joris M},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Domain Adaptation by Using Causal Inference to Predict Invariant Conditional Distributions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/39e98420b5e98bfbdc8a619bef7b8f61-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/39e98420b5e98bfbdc8a619bef7b8f61-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/39e98420b5e98bfbdc8a619bef7b8f61-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/39e98420b5e98bfbdc8a619bef7b8f61-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/39e98420b5e98bfbdc8a619bef7b8f61-Reviews.html", "metareview": "", "pdf_size": 404431, "gs_citation": 267, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3967372382720766256&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 16, "aff": "MIT-IBM Watson AI Lab, IBM Research\u2217; University of Amsterdam; Radboud University Nijmegen; University of Amsterdam; University of Amsterdam; University of Amsterdam", "aff_domain": "gmail.com;gmail.com;cs.ru.nl;gmail.com;uva.nl;uva.nl", "email": "gmail.com;gmail.com;cs.ru.nl;gmail.com;uva.nl;uva.nl", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/39e98420b5e98bfbdc8a619bef7b8f61-Abstract.html", "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "IBM;University of Amsterdam;Radboud University", "aff_unique_dep": "AI Lab;;", "aff_unique_url": "https://www.ibmwatsonai.org/;https://www.uva.nl;https://www.ru.nl/", "aff_unique_abbr": "MIT-IBM AI Lab;UvA;RU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Nijmegen", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "United States;Netherlands" }, { "title": "Domain-Invariant Projection Learning for Zero-Shot Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11122", "id": "11122", "author_site": "An Zhao, Mingyu Ding, Jiechao Guan, Zhiwu Lu, Tao Xiang, Ji-Rong Wen", "author": "An Zhao; Mingyu Ding; Jiechao Guan; Zhiwu Lu; Tao Xiang; Ji-Rong Wen", "abstract": "Zero-shot learning (ZSL) aims to recognize unseen object classes without any training samples, which can be regarded as a form of transfer learning from seen classes to unseen ones. This is made possible by learning a projection between a feature space and a semantic space (e.g. attribute space). Key to ZSL is thus to learn a projection function that is robust against the often large domain gap between the seen and unseen classes. In this paper, we propose a novel ZSL model termed domain-invariant projection learning (DIPL). Our model has two novel components: (1) A domain-invariant feature self-reconstruction task is introduced to the seen/unseen class data, resulting in a simple linear formulation that casts ZSL into a min-min optimization problem. Solving the problem is non-trivial, and a novel iterative algorithm is formulated as the solver, with rigorous theoretic algorithm analysis provided. (2) To further align the two domains via the learned projection, shared semantic structure among seen and unseen classes is explored via forming superclasses in the semantic space. Extensive experiments show that our model outperforms the state-of-the-art alternatives by significant margins.", "bibtex": "@inproceedings{NEURIPS2018_ccb1d45f,\n author = {Zhao, An and Ding, Mingyu and Guan, Jiechao and Lu, Zhiwu and Xiang, Tao and Wen, Ji-Rong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Domain-Invariant Projection Learning for Zero-Shot Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ccb1d45fb76f7c5a0bf619f979c6cf36-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ccb1d45fb76f7c5a0bf619f979c6cf36-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ccb1d45fb76f7c5a0bf619f979c6cf36-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ccb1d45fb76f7c5a0bf619f979c6cf36-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ccb1d45fb76f7c5a0bf619f979c6cf36-Reviews.html", "metareview": "", "pdf_size": 386721, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6996871515256026149&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Beijing Key Laboratory of Big Data Management and Analysis Methods, School of Information, Renmin University of China; Beijing Key Laboratory of Big Data Management and Analysis Methods, School of Information, Renmin University of China; Beijing Key Laboratory of Big Data Management and Analysis Methods, School of Information, Renmin University of China; Beijing Key Laboratory of Big Data Management and Analysis Methods, School of Information, Renmin University of China + School of EECS, Queen Mary University of London + Samsung AI Centre, Cambridge; School of EECS, Queen Mary University of London + Samsung AI Centre, Cambridge; Beijing Key Laboratory of Big Data Management and Analysis Methods, School of Information, Renmin University of China", "aff_domain": "gmail.com; ; ;qmul.ac.uk; ; ", "email": "gmail.com; ; ;qmul.ac.uk; ; ", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ccb1d45fb76f7c5a0bf619f979c6cf36-Abstract.html", "aff_unique_index": "0;0;0;0+1+2;1+2;0", "aff_unique_norm": "Renmin University of China;Queen Mary University of London;Samsung", "aff_unique_dep": "School of Information;School of EECS;AI Centre", "aff_unique_url": "http://www.ruc.edu.cn;https://www.qmul.ac.uk;https://www.samsung.com/global/campaign/ai-research-centre/", "aff_unique_abbr": "RUC;QMUL;SAC", "aff_campus_unique_index": "0;0;0;0+1+2;1+2;0", "aff_campus_unique": "Beijing;London;Cambridge", "aff_country_unique_index": "0;0;0;0+1+1;1+1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Doubly Robust Bayesian Inference for Non-Stationary Streaming Data with $\\beta$-Divergences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11034", "id": "11034", "author_site": "Jeremias Knoblauch, Jack E Jewson, Theodoros Damoulas", "author": "Jeremias Knoblauch; Jack E Jewson; Theodoros Damoulas", "abstract": "We present the very first robust Bayesian Online Changepoint Detection algorithm through General Bayesian Inference (GBI) with $\\beta$-divergences. The resulting inference procedure is doubly robust for both the predictive and the changepoint (CP) posterior, with linear time and constant space complexity. We provide a construction for exponential models and demonstrate it on the Bayesian Linear Regression model. In so doing, we make two additional contributions: Firstly, we make GBI scalable using Structural Variational approximations that are exact as $\\beta \\to 0$. Secondly, we give a principled way of choosing the divergence parameter $\\beta$ by minimizing expected predictive loss on-line. Reducing False Discovery Rates of \\CPs from up to 99\\% to 0\\% on real world data, this offers the state of the art.", "bibtex": "@inproceedings{NEURIPS2018_a3f390d8,\n author = {Knoblauch, Jeremias and Jewson, Jack E and Damoulas, Theodoros},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Doubly Robust Bayesian Inference for Non-Stationary Streaming Data with \\textbackslash beta-Divergences},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a3f390d88e4c41f2747bfa2f1b5f87db-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a3f390d88e4c41f2747bfa2f1b5f87db-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a3f390d88e4c41f2747bfa2f1b5f87db-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a3f390d88e4c41f2747bfa2f1b5f87db-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a3f390d88e4c41f2747bfa2f1b5f87db-Reviews.html", "metareview": "", "pdf_size": 1368303, "gs_citation": 90, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5758442039319568302&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "The Alan Turing Institute + Department of Statistics, University of Warwick; Department of Statistics, University of Warwick; The Alan Turing Institute + Department of Computer Science & Department of Statistics, University of Warwick", "aff_domain": "warwick.ac.uk;warwick.ac.uk;warwick.ac.uk", "email": "warwick.ac.uk;warwick.ac.uk;warwick.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a3f390d88e4c41f2747bfa2f1b5f87db-Abstract.html", "aff_unique_index": "0+1;1;0+1", "aff_unique_norm": "Alan Turing Institute;University of Warwick", "aff_unique_dep": ";Department of Statistics", "aff_unique_url": "https://www.turing.ac.uk;https://warwick.ac.uk", "aff_unique_abbr": "ATI;Warwick", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;0+0", "aff_country_unique": "United Kingdom" }, { "title": "DropBlock: A regularization method for convolutional networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12014", "id": "12014", "author_site": "Golnaz Ghiasi, Tsung-Yi Lin, Quoc V Le", "author": "Golnaz Ghiasi; Tsung-Yi Lin; Quoc V Le", "abstract": "Deep neural networks often work well when they are over-parameterized and trained with a massive amount of noise and regularization, such as weight decay and dropout. Although dropout is widely used as a regularization technique for fully connected layers, it is often less effective for convolutional layers. This lack of success of dropout for convolutional layers is perhaps due to the fact that activation units in convolutional layers are spatially correlated so information can still flow through convolutional networks despite dropout. Thus a structured form of dropout is needed to regularize convolutional networks. In this paper, we introduce DropBlock, a form of structured dropout, where units in a contiguous region of a feature map are dropped together. We found that applying DropbBlock in skip connections in addition to the convolution layers increases the accuracy. Also, gradually increasing number of dropped units during training leads to better accuracy and more robust to hyperparameter choices. Extensive experiments show that DropBlock works better than dropout in regularizing convolutional networks.\n On ImageNet classification, ResNet-50 architecture with DropBlock achieves $78.13\\%$ accuracy, which is more than $1.6\\%$ improvement on the baseline. On COCO detection, DropBlock improves Average Precision of RetinaNet from $36.8\\%$ to $38.4\\%$.", "bibtex": "@inproceedings{NEURIPS2018_7edcfb2d,\n author = {Ghiasi, Golnaz and Lin, Tsung-Yi and Le, Quoc V},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {DropBlock: A regularization method for convolutional networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7edcfb2d8f6a659ef4cd1e6c9b6d7079-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7edcfb2d8f6a659ef4cd1e6c9b6d7079-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7edcfb2d8f6a659ef4cd1e6c9b6d7079-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7edcfb2d8f6a659ef4cd1e6c9b6d7079-Reviews.html", "metareview": "", "pdf_size": 1242109, "gs_citation": 1300, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6328166364135944716&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7edcfb2d8f6a659ef4cd1e6c9b6d7079-Abstract.html" }, { "title": "DropMax: Adaptive Variational Softmax", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11113", "id": "11113", "author_site": "Hae Beom Lee, Juho Lee, Saehoon Kim, Eunho Yang, Sung Ju Hwang", "author": "Hae Beom Lee; Juho Lee; Saehoon Kim; Eunho Yang; Sung Ju Hwang", "abstract": "We propose DropMax, a stochastic version of softmax classifier which at each iteration drops non-target classes according to dropout probabilities adaptively decided for each instance. Specifically, we overlay binary masking variables over class output probabilities, which are input-adaptively learned via variational inference. This stochastic regularization has an effect of building an ensemble classifier out of exponentially many classifiers with different decision boundaries. Moreover, the learning of dropout rates for non-target classes on each instance allows the classifier to focus more on classification against the most confusing classes. We validate our model on multiple public datasets for classification, on which it obtains significantly improved accuracy over the regular softmax classifier and other baselines. Further analysis of the learned dropout probabilities shows that our model indeed selects confusing classes more often when it performs classification.", "bibtex": "@inproceedings{NEURIPS2018_389bc7bb,\n author = {Lee, Hae Beom and Lee, Juho and Kim, Saehoon and Yang, Eunho and Hwang, Sung Ju},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {DropMax: Adaptive Variational Softmax},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/389bc7bb1e1c2a5e7e147703232a88f6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/389bc7bb1e1c2a5e7e147703232a88f6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/389bc7bb1e1c2a5e7e147703232a88f6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/389bc7bb1e1c2a5e7e147703232a88f6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/389bc7bb1e1c2a5e7e147703232a88f6-Reviews.html", "metareview": "", "pdf_size": 1268126, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6113755016125254061&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/389bc7bb1e1c2a5e7e147703232a88f6-Abstract.html" }, { "title": "Dropping Symmetry for Fast Symmetric Nonnegative Matrix Factorization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11504", "id": "11504", "author_site": "Zhihui Zhu, Xiao Li, Kai Liu, Qiuwei Li", "author": "Zhihui Zhu; Xiao Li; Kai Liu; Qiuwei Li", "abstract": "Symmetric nonnegative matrix factorization (NMF)---a special but important class of the general NMF---is demonstrated to be useful for data analysis and in particular for various clustering tasks. Unfortunately, designing fast algorithms for Symmetric NMF is not as easy as for the nonsymmetric counterpart, the latter admitting the splitting property that allows efficient alternating-type algorithms. To overcome this issue, we transfer the symmetric NMF to a nonsymmetric one, then we can adopt the idea from the state-of-the-art algorithms for nonsymmetric NMF to design fast algorithms solving symmetric NMF. We rigorously establish that solving nonsymmetric reformulation returns a solution for symmetric NMF and then apply fast alternating based algorithms for the corresponding reformulated problem. Furthermore, we show these fast algorithms admit strong convergence guarantee in the sense that the generated sequence is convergent at least at a sublinear rate and it converges globally to a critical point of the symmetric NMF. We conduct experiments on both synthetic data and image clustering to support our result.", "bibtex": "@inproceedings{NEURIPS2018_d9ff90f4,\n author = {Zhu, Zhihui and Li, Xiao and Liu, Kai and Li, Qiuwei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dropping Symmetry for Fast Symmetric Nonnegative Matrix Factorization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d9ff90f4000eacd3a6c9cb27f78994cf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d9ff90f4000eacd3a6c9cb27f78994cf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d9ff90f4000eacd3a6c9cb27f78994cf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d9ff90f4000eacd3a6c9cb27f78994cf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d9ff90f4000eacd3a6c9cb27f78994cf-Reviews.html", "metareview": "", "pdf_size": 715951, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3937135106028652021&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Mathematical Institute for Data Science, Johns Hopkins University, Baltimore, MD, USA; Department of Electronic Engineering, The Chinese University of Hong Kong, Shatin, NT, Hong Kong; Department of Computer Science, Colorado School of Mines, Golden, CO, USA; Department of Electrical Engineering, Colorado School of Mines, Golden, CO, USA", "aff_domain": "jhu.edu;ee.cuhk.edu.hk;mines.edu;mines.edu", "email": "jhu.edu;ee.cuhk.edu.hk;mines.edu;mines.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d9ff90f4000eacd3a6c9cb27f78994cf-Abstract.html", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Johns Hopkins University;Chinese University of Hong Kong;Colorado School of Mines", "aff_unique_dep": "Mathematical Institute for Data Science;Department of Electronic Engineering;Department of Computer Science", "aff_unique_url": "https://www.jhu.edu;https://www.cuhk.edu.hk;https://www.mines.edu", "aff_unique_abbr": "JHU;CUHK;CSM", "aff_campus_unique_index": "0;1;2;2", "aff_campus_unique": "Baltimore;Hong Kong SAR;Golden", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Dual Policy Iteration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11680", "id": "11680", "author_site": "Wen Sun, Geoffrey Gordon, Byron Boots, J. Bagnell", "author": "Wen Sun; Geoffrey J. Gordon; Byron Boots; J. Bagnell", "abstract": "Recently, a novel class of Approximate Policy Iteration (API) algorithms have demonstrated impressive practical performance (e.g., ExIt from [1], AlphaGo-Zero from [2]). This new family of algorithms maintains, and alternately optimizes, two policies: a fast, reactive policy (e.g., a deep neural network) deployed at test time, and a slow, non-reactive policy (e.g., Tree Search), that can plan multiple steps ahead. The reactive policy is updated under supervision from the non-reactive policy, while the non-reactive policy is improved with guidance from the reactive policy. In this work we study this Dual Policy Iteration (DPI) strategy in an alternating optimization framework and provide a convergence analysis that extends existing API theory. We also develop a special instance of this framework which reduces the update of non-reactive policies to model-based optimal control using learned local models, and provides a theoretically sound way of unifying model-free and model-based RL approaches with unknown dynamics. We demonstrate the efficacy of our approach on various continuous control Markov Decision Processes.", "bibtex": "@inproceedings{NEURIPS2018_15e122e8,\n author = {Sun, Wen and Gordon, Geoffrey J and Boots, Byron and Bagnell, J.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dual Policy Iteration},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/15e122e839dfdaa7ce969536f94aecf6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/15e122e839dfdaa7ce969536f94aecf6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/15e122e839dfdaa7ce969536f94aecf6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/15e122e839dfdaa7ce969536f94aecf6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/15e122e839dfdaa7ce969536f94aecf6-Reviews.html", "metareview": "", "pdf_size": 2481641, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7140809691676504820&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "School of Computer Science, Carnegie Mellon University, USA; School of Computer Science, Carnegie Mellon University, USA; College of Computing, Georgia Institute of Technology, USA; Aurora Innovation, USA", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cc.gatech.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cc.gatech.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/15e122e839dfdaa7ce969536f94aecf6-Abstract.html", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Carnegie Mellon University;Georgia Institute of Technology;Aurora Innovation", "aff_unique_dep": "School of Computer Science;College of Computing;", "aff_unique_url": "https://www.cmu.edu;https://www.gatech.edu;https://aurora.tech", "aff_unique_abbr": "CMU;Georgia Tech;Aurora", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Dual Principal Component Pursuit: Improved Analysis and Efficient Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11228", "id": "11228", "author_site": "Zhihui Zhu, Yifan Wang, Daniel Robinson, Daniel Naiman, Ren\u00e9 Vidal, Manolis Tsakiris", "author": "Zhihui Zhu; Yifan Wang; Daniel Robinson; Daniel Naiman; Ren\u00e9 Vidal; Manolis Tsakiris", "abstract": "Recent methods for learning a linear subspace from data corrupted by outliers are based on convex L1 and nuclear norm optimization and require the dimension of the subspace and the number of outliers to be sufficiently small [27]. In sharp contrast, the recently proposed Dual Principal Component Pursuit (DPCP) method [22] can provably handle subspaces of high dimension by solving a non-convex L1 optimization problem on the sphere. However, its geometric analysis is based on quantities that are difficult to interpret and are not amenable to statistical analysis. In this paper we provide a refined geometric analysis and a new statistical analysis that show that DPCP can tolerate as many outliers as the square of the number of inliers, thus improving upon other provably correct robust PCA methods. We also propose a scalable Projected Sub-Gradient Descent method (DPCP-PSGD) for solving the DPCP problem and show it admits linear convergence even though the underlying optimization problem is non-convex and non-smooth. Experiments on road plane detection from 3D point cloud data demonstrate that DPCP-PSGD can be more efficient than the traditional RANSAC algorithm, which is one of the most popular methods for such computer vision applications.", "bibtex": "@inproceedings{NEURIPS2018_af21d0c9,\n author = {Zhu, Zhihui and Wang, Yifan and Robinson, Daniel and Naiman, Daniel and Vidal, Ren\\'{e} and Tsakiris, Manolis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dual Principal Component Pursuit: Improved Analysis and Efficient Algorithms},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/af21d0c97db2e27e13572cbf59eb343d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/af21d0c97db2e27e13572cbf59eb343d-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/af21d0c97db2e27e13572cbf59eb343d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/af21d0c97db2e27e13572cbf59eb343d-Reviews.html", "metareview": "", "pdf_size": 15183241, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6518498734752325665&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "MINDS, Johns Hopkins University; SIST, ShanghaiTech University; AMS, Johns Hopkins University; AMS, Johns Hopkins University; MINDS, Johns Hopkins University; SIST, ShanghaiTech University", "aff_domain": "jhu.edu;shanghaitech.edu.cn;jhu.edu;jhu.edu;jhu.edu;shanghaitech.edu.cn", "email": "jhu.edu;shanghaitech.edu.cn;jhu.edu;jhu.edu;jhu.edu;shanghaitech.edu.cn", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/af21d0c97db2e27e13572cbf59eb343d-Abstract.html", "aff_unique_index": "0;1;0;0;0;1", "aff_unique_norm": "Johns Hopkins University;ShanghaiTech University", "aff_unique_dep": "MINDS;School of Information Science and Technology (SIST)", "aff_unique_url": "https://www.jhu.edu;http://www.shanghaitech.edu.cn", "aff_unique_abbr": "JHU;ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Dual Swap Disentangling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11573", "id": "11573", "author_site": "Zunlei Feng, Xinchao Wang, Chenglong Ke, An-Xiang Zeng, Dacheng Tao, Mingli Song", "author": "Zunlei Feng; Xinchao Wang; Chenglong Ke; An-Xiang Zeng; Dacheng Tao; Mingli Song", "abstract": "Learning interpretable disentangled representations is a crucial yet challenging task. In this paper, we propose a weakly semi-supervised method, termed as Dual Swap Disentangling (DSD), for disentangling using both labeled and unlabeled data. Unlike conventional weakly supervised methods that rely on full annotations on the group of samples, we require only limited annotations on paired samples that indicate their shared attribute like the color. Our model takes the form of a dual autoencoder structure. To achieve disentangling using the labeled pairs, we follow a", "bibtex": "@inproceedings{NEURIPS2018_fdf1bc56,\n author = {Feng, Zunlei and Wang, Xinchao and Ke, Chenglong and Zeng, An-Xiang and Tao, Dacheng and Song, Mingli},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dual Swap Disentangling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fdf1bc5669e8ff5ba45d02fded729feb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fdf1bc5669e8ff5ba45d02fded729feb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/fdf1bc5669e8ff5ba45d02fded729feb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fdf1bc5669e8ff5ba45d02fded729feb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fdf1bc5669e8ff5ba45d02fded729feb-Reviews.html", "metareview": "", "pdf_size": 977568, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9095937857315962499&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Zhejiang University; Stevens Institute of Technology; Zhejiang University; Alibaba Group; University of Sydney; Zhejiang University", "aff_domain": "zju.edu.cn;stevens.edu;zju.edu.cn;taobao.com;sydney.edu.au;zju.edu.cn", "email": "zju.edu.cn;stevens.edu;zju.edu.cn;taobao.com;sydney.edu.au;zju.edu.cn", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fdf1bc5669e8ff5ba45d02fded729feb-Abstract.html", "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "Zhejiang University;Stevens Institute of Technology;Alibaba Group;University of Sydney", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.stevens.edu;https://www.alibaba.com;https://www.sydney.edu.au", "aff_unique_abbr": "ZJU;SIT;Alibaba;USYD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2;0", "aff_country_unique": "China;United States;Australia" }, { "title": "Dynamic Network Model from Partial Observations", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11936", "id": "11936", "author_site": "Elahe Ghalebi, Baharan Mirzasoleiman, Radu Grosu, Jure Leskovec", "author": "Elahe Ghalebi; Baharan Mirzasoleiman; Radu Grosu; Jure Leskovec", "abstract": "Can evolving networks be inferred and modeled without directly observing their nodes and edges? In many applications, the edges of a dynamic network might not be observed, but one can observe the dynamics of stochastic cascading processes (e.g., information diffusion, virus propagation) occurring over the unobserved network. While there have been efforts to infer networks based on such data, providing a generative probabilistic model that is able to identify the underlying time-varying network remains an open question. Here we consider the problem of inferring generative dynamic network models based on network cascade diffusion data. We propose a novel framework for providing a non-parametric dynamic network model---based on a mixture of coupled hierarchical Dirichlet processes---based on data capturing cascade node infection times. Our approach allows us to infer the evolving community structure in networks and to obtain an explicit predictive distribution over the edges of the underlying network---including those that were not involved in transmission of any cascade, or are likely to appear in the future. We show the effectiveness of our approach using extensive experiments on synthetic as well as real-world networks.", "bibtex": "@inproceedings{NEURIPS2018_e1dc4bf1,\n author = {Ghalebi, Elahe and Mirzasoleiman, Baharan and Grosu, Radu and Leskovec, Jure},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dynamic Network Model from Partial Observations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e1dc4bf1f94e87fdfeb2d91ae3dc10ef-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e1dc4bf1f94e87fdfeb2d91ae3dc10ef-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e1dc4bf1f94e87fdfeb2d91ae3dc10ef-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e1dc4bf1f94e87fdfeb2d91ae3dc10ef-Reviews.html", "metareview": "", "pdf_size": 498726, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1043345695726214648&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 16, "aff": "TU Wien; Stanford University; TU Wien; Stanford University", "aff_domain": "cps.tuwien.ac.at;cs.stanford.edu;tuwien.ac.at;cs.stanford.edu", "email": "cps.tuwien.ac.at;cs.stanford.edu;tuwien.ac.at;cs.stanford.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e1dc4bf1f94e87fdfeb2d91ae3dc10ef-Abstract.html", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Technische Universit\u00e4t Wien;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tuwien.ac.at;https://www.stanford.edu", "aff_unique_abbr": "TU Wien;Stanford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Austria;United States" }, { "title": "Early Stopping for Nonparametric Testing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11396", "id": "11396", "author_site": "Meimei Liu, Guang Cheng", "author": "Meimei Liu; Guang Cheng", "abstract": "Early stopping of iterative algorithms is an algorithmic regularization method to avoid over-fitting in estimation and classification. In this paper, we show that early stopping can also be applied to obtain the minimax optimal testing in a general non-parametric setup. Specifically, a Wald-type test statistic is obtained based on an iterated estimate produced by functional gradient descent algorithms in a reproducing kernel Hilbert space. A notable contribution is to establish a ``sharp'' stopping rule: when the number of iterations achieves an optimal order, testing optimality is achievable; otherwise, testing optimality becomes impossible. As a by-product, a similar sharpness result is also derived for minimax optimal estimation under early stopping. All obtained results hold for various kernel classes, including Sobolev smoothness classes and Gaussian kernel classes.", "bibtex": "@inproceedings{NEURIPS2018_3d863b36,\n author = {Liu, Meimei and Cheng, Guang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Early Stopping for Nonparametric Testing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3d863b367aa379f71c7afc0c9cdca41d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3d863b367aa379f71c7afc0c9cdca41d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3d863b367aa379f71c7afc0c9cdca41d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3d863b367aa379f71c7afc0c9cdca41d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3d863b367aa379f71c7afc0c9cdca41d-Reviews.html", "metareview": "", "pdf_size": 1357325, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11825327118550117626&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Statistical Science, Duke University; Department of Statistics, Purdue University", "aff_domain": "duke.edu;purdue.edu", "email": "duke.edu;purdue.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3d863b367aa379f71c7afc0c9cdca41d-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Duke University;Purdue University", "aff_unique_dep": "Department of Statistical Science;Department of Statistics", "aff_unique_url": "https://www.duke.edu;https://www.purdue.edu", "aff_unique_abbr": "Duke;Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Efficient Algorithms for Non-convex Isotonic Regression through Submodular Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11028", "id": "11028", "author": "Francis Bach", "abstract": "We consider the minimization of submodular functions subject to ordering constraints. We show that this potentially non-convex optimization problem can be cast as a convex optimization problem on a space of uni-dimensional measures, with ordering constraints corresponding to first-order stochastic dominance. We propose new discretization schemes that lead to simple and efficient algorithms based on zero-th, first, or higher order oracles; these algorithms also lead to improvements without isotonic constraints. Finally, our experiments show that non-convex loss functions can be much more robust to outliers for isotonic regression, while still being solvable in polynomial time.", "bibtex": "@inproceedings{NEURIPS2018_6ea9ab1b,\n author = {Bach, Francis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Algorithms for Non-convex Isotonic Regression through Submodular Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6ea9ab1baa0efb9e19094440c317e21b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6ea9ab1baa0efb9e19094440c317e21b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6ea9ab1baa0efb9e19094440c317e21b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6ea9ab1baa0efb9e19094440c317e21b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6ea9ab1baa0efb9e19094440c317e21b-Reviews.html", "metareview": "", "pdf_size": 569378, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2802759581653455237&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6ea9ab1baa0efb9e19094440c317e21b-Abstract.html" }, { "title": "Efficient Anomaly Detection via Matrix Sketching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11773", "id": "11773", "author_site": "Vatsal Sharan, Parikshit Gopalan, Udi Wieder", "author": "Vatsal Sharan; Parikshit Gopalan; Udi Wieder", "abstract": "We consider the problem of finding anomalies in high-dimensional data using popular PCA based anomaly scores. The naive algorithms for computing these scores explicitly compute the PCA of the covariance matrix which uses space quadratic in the dimensionality of the data. We give the first streaming algorithms that use space that is linear or sublinear in the dimension. We prove general results showing that \\emph{any} sketch of a matrix that satisfies a certain operator norm guarantee can be used to approximate these scores. We instantiate these results with powerful matrix sketching techniques such as Frequent Directions and random projections to derive efficient and practical algorithms for these problems, which we validate over real-world data sets. Our main technical contribution is to prove matrix perturbation inequalities for operators arising in the computation of these measures.", "bibtex": "@inproceedings{NEURIPS2018_34adeb8e,\n author = {Sharan, Vatsal and Gopalan, Parikshit and Wieder, Udi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Anomaly Detection via Matrix Sketching},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/34adeb8e3242824038aa65460a47c29e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/34adeb8e3242824038aa65460a47c29e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/34adeb8e3242824038aa65460a47c29e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/34adeb8e3242824038aa65460a47c29e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/34adeb8e3242824038aa65460a47c29e-Reviews.html", "metareview": "", "pdf_size": 791237, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10628166240291958501&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Stanford University\u2217; VMware Research; VMware Research", "aff_domain": "stanford.edu;vmware.com;vmware.com", "email": "stanford.edu;vmware.com;vmware.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/34adeb8e3242824038aa65460a47c29e-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Stanford University;VMware, Inc.", "aff_unique_dep": ";VMware Research", "aff_unique_url": "https://www.stanford.edu;https://www.vmware.com/research.html", "aff_unique_abbr": "Stanford;VMware", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Convex Completion of Coupled Tensors using Coupled Nuclear Norms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11665", "id": "11665", "author_site": "Kishan Wimalawarne, Hiroshi Mamitsuka", "author": "Kishan Wimalawarne; Hiroshi Mamitsuka", "abstract": "Coupled norms have emerged as a convex method to solve coupled tensor completion. A limitation with coupled norms is that they only induce low-rankness using the multilinear rank of coupled tensors. In this paper, we introduce a new set of coupled norms known as coupled nuclear norms by constraining the CP rank of coupled tensors. We propose new coupled completion models using the coupled nuclear norms as regularizers, which can be optimized using computationally efficient optimization methods. We derive excess risk bounds for proposed coupled completion models and show that proposed norms lead to better performance. Through simulation and real-data experiments, we demonstrate that proposed norms achieve better performance for coupled completion compared to existing coupled norms.", "bibtex": "@inproceedings{NEURIPS2018_12092a75,\n author = {Wimalawarne, Kishan and Mamitsuka, Hiroshi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Convex Completion of Coupled Tensors using Coupled Nuclear Norms},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/12092a75caa75e4644fd2869f0b6c45a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/12092a75caa75e4644fd2869f0b6c45a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/12092a75caa75e4644fd2869f0b6c45a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/12092a75caa75e4644fd2869f0b6c45a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/12092a75caa75e4644fd2869f0b6c45a-Reviews.html", "metareview": "", "pdf_size": 145717, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2670598151914225435&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff": "Bioinformatics Center, Kyoto University, Kyoto, Japan+Department of Computer Science, Aalto University, Espoo, Finland; Bioinformatics Center, Kyoto University, Kyoto, Japan+Department of Computer Science, Aalto University, Espoo, Finland", "aff_domain": "gmail.com;kuicr.kyoto-u.ac.jp", "email": "gmail.com;kuicr.kyoto-u.ac.jp", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/12092a75caa75e4644fd2869f0b6c45a-Abstract.html", "aff_unique_index": "0+1;0+1", "aff_unique_norm": "Kyoto University;Aalto University", "aff_unique_dep": "Bioinformatics Center;Department of Computer Science", "aff_unique_url": "https://www.kyoto-u.ac.jp;https://www.aalto.fi", "aff_unique_abbr": "Kyoto U;Aalto", "aff_campus_unique_index": "0+1;0+1", "aff_campus_unique": "Kyoto;Espoo", "aff_country_unique_index": "0+1;0+1", "aff_country_unique": "Japan;Finland" }, { "title": "Efficient Formal Safety Analysis of Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11616", "id": "11616", "author_site": "Shiqi Wang, Kexin Pei, Justin Whitehouse, Junfeng Yang, Suman Jana", "author": "Shiqi Wang; Kexin Pei; Justin Whitehouse; Junfeng Yang; Suman Jana", "abstract": "Neural networks are increasingly deployed in real-world safety-critical domains such as autonomous driving, aircraft collision avoidance, and malware detection. However, these networks have been shown to often mispredict on inputs with minor adversarial or even accidental perturbations. Consequences of such errors can be disastrous and even potentially fatal as shown by the recent Tesla autopilot crash. Thus, there is an urgent need for formal analysis systems that can rigorously check neural networks for violations of different safety properties such as robustness against adversarial perturbations within a certain L-norm of a given image. An effective safety analysis system for a neural network must be able to either ensure that a safety property is satisfied by the network or find a counterexample, i.e., an input for which the network will violate the property. Unfortunately, most existing techniques for performing such analysis struggle to scale beyond very small networks and the ones that can scale to larger networks suffer from high false positives and cannot produce concrete counterexamples in case of a property violation. In this paper, we present a new efficient approach for rigorously checking different safety properties of neural networks that significantly outperforms existing approaches by multiple orders of magnitude. Our approach can check different safety properties and find concrete counterexamples for networks that are 10x larger than the ones supported by existing analysis techniques. We believe that our approach to estimating tight output bounds of a network for a given input range can also help improve the explainability of neural networks and guide the training process of more robust neural networks.", "bibtex": "@inproceedings{NEURIPS2018_2ecd2bd9,\n author = {Wang, Shiqi and Pei, Kexin and Whitehouse, Justin and Yang, Junfeng and Jana, Suman},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Formal Safety Analysis of Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2ecd2bd94734e5dd392d8678bc64cdab-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2ecd2bd94734e5dd392d8678bc64cdab-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2ecd2bd94734e5dd392d8678bc64cdab-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2ecd2bd94734e5dd392d8678bc64cdab-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2ecd2bd94734e5dd392d8678bc64cdab-Reviews.html", "metareview": "", "pdf_size": 1250528, "gs_citation": 550, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17991981097688462983&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "aff": "Columbia University, NYC, NY 10027, USA; Columbia University, NYC, NY 10027, USA; Columbia University, NYC, NY 10027, USA; Columbia University, NYC, NY 10027, USA; Columbia University, NYC, NY 10027, USA", "aff_domain": "cs.columbia.edu;cs.columbia.edu;cs.columbia.edu;cs.columbia.edu;cs.columbia.edu", "email": "cs.columbia.edu;cs.columbia.edu;cs.columbia.edu;cs.columbia.edu;cs.columbia.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2ecd2bd94734e5dd392d8678bc64cdab-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "New York City", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Gradient Computation for Structured Output Learning with Rational and Tropical Losses", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11657", "id": "11657", "author_site": "Corinna Cortes, Vitaly Kuznetsov, Mehryar Mohri, Dmitry Storcheus, Scott Yang", "author": "Corinna Cortes; Vitaly Kuznetsov; Mehryar Mohri; Dmitry Storcheus; Scott Yang", "abstract": "Many structured prediction problems admit a natural loss function for evaluation such as the edit-distance or $n$-gram loss. However, existing learning algorithms are typically designed to optimize alternative objectives such as the cross-entropy. This is because a na\\\"{i}ve implementation of the natural loss functions often results in intractable gradient computations. In this paper, we design efficient gradient computation algorithms for two broad families of structured prediction loss functions: rational and tropical losses. These families include as special cases the $n$-gram loss, the edit-distance loss, and many other loss functions commonly used in natural language processing and computational biology tasks that are based on sequence similarity measures. Our algorithms make use of weighted automata and graph operations over appropriate semirings to design efficient solutions. They facilitate efficient gradient computation and hence enable one to train learning models such as neural networks with complex structured losses.", "bibtex": "@inproceedings{NEURIPS2018_f6d9e459,\n author = {Cortes, Corinna and Kuznetsov, Vitaly and Mohri, Mehryar and Storcheus, Dmitry and Yang, Scott},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Gradient Computation for Structured Output Learning with Rational and Tropical Losses},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f6d9e459b9fbf6dd26c4f7d621adec1d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f6d9e459b9fbf6dd26c4f7d621adec1d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f6d9e459b9fbf6dd26c4f7d621adec1d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f6d9e459b9fbf6dd26c4f7d621adec1d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f6d9e459b9fbf6dd26c4f7d621adec1d-Reviews.html", "metareview": "", "pdf_size": 565786, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5480846489284714269&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Google Research; Google Research; Courant Institute and Google Research; Courant Institute and Google Research; D. E. Shaw and Co. + Courant Institute", "aff_domain": "google.com;google.com;cims.nyu.edu;google.com;cims.nyu.edu", "email": "google.com;google.com;cims.nyu.edu;google.com;cims.nyu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f6d9e459b9fbf6dd26c4f7d621adec1d-Abstract.html", "aff_unique_index": "0;0;1;1;2+3", "aff_unique_norm": "Google;Courant Institute;D. E. Shaw and Co.;Courant Institute of Mathematical Sciences", "aff_unique_dep": "Google Research;Courant Institute;;Mathematical Sciences", "aff_unique_url": "https://research.google;https://courant.nyu.edu;https://www.deshaw.com;https://courant.nyu.edu", "aff_unique_abbr": "Google Research;Courant;DES;Courant", "aff_campus_unique_index": "0;0;", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0+0", "aff_country_unique": "United States" }, { "title": "Efficient High Dimensional Bayesian Optimization with Additivity and Quadrature Fourier Features", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11859", "id": "11859", "author_site": "Mojmir Mutny, Andreas Krause", "author": "Mojmir Mutny; Andreas Krause", "abstract": "We develop an efficient and provably no-regret Bayesian optimization (BO) algorithm for optimization of black-box functions in high dimensions. We assume a generalized additive model with possibly overlapping variable groups. When the groups do not overlap, we are able to provide the first provably no-regret \\emph{polynomial time} (in the number of evaluations of the acquisition function) algorithm for solving high dimensional BO. To make the optimization efficient and feasible, we introduce a novel deterministic Fourier Features approximation based on numerical integration with detailed analysis for the squared exponential kernel. The error of this approximation decreases \\emph{exponentially} with the number of features, and allows for a precise approximation of both posterior mean and variance. In addition, the kernel matrix inversion improves in its complexity from cubic to essentially linear in the number of data points measured in basic arithmetic operations.", "bibtex": "@inproceedings{NEURIPS2018_4e5046fc,\n author = {Mutny, Mojmir and Krause, Andreas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient High Dimensional Bayesian Optimization with Additivity and Quadrature Fourier Features},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4e5046fc8d6a97d18a5f54beaed54dea-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4e5046fc8d6a97d18a5f54beaed54dea-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4e5046fc8d6a97d18a5f54beaed54dea-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4e5046fc8d6a97d18a5f54beaed54dea-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4e5046fc8d6a97d18a5f54beaed54dea-Reviews.html", "metareview": "", "pdf_size": 2853554, "gs_citation": 171, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=287666787940504430&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Department of Computer Science, ETH Zurich, Switzerland; Department of Computer Science, ETH Zurich, Switzerland", "aff_domain": "inf.ethz.ch;inf.ethz.ch", "email": "inf.ethz.ch;inf.ethz.ch", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4e5046fc8d6a97d18a5f54beaed54dea-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Efficient Loss-Based Decoding on Graphs for Extreme Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11696", "id": "11696", "author_site": "Itay Evron, Edward Moroshko, Yacov Crammer", "author": "Itay Evron; Edward Moroshko; Koby Crammer", "abstract": "In extreme classification problems, learning algorithms are required to map instances to labels from an extremely large label set.\n We build on a recent extreme classification framework with logarithmic time and space (LTLS), and on a general approach for error correcting output coding (ECOC) with loss-based decoding, and introduce a flexible and efficient approach accompanied by theoretical bounds.\n Our framework employs output codes induced by graphs, for which we show how to perform efficient loss-based decoding to potentially improve accuracy.\n In addition, our framework offers a tradeoff between accuracy, model size and prediction time.\n We show how to find the sweet spot of this tradeoff using only the training data.\nOur experimental study demonstrates the validity of our assumptions and claims, and shows that our method is competitive with state-of-the-art algorithms.", "bibtex": "@inproceedings{NEURIPS2018_e7e69cdf,\n author = {Evron, Itay and Moroshko, Edward and Crammer, Koby},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Loss-Based Decoding on Graphs for Extreme Classification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e7e69cdf28f8ce6b69b4e1853ee21bab-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e7e69cdf28f8ce6b69b4e1853ee21bab-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e7e69cdf28f8ce6b69b4e1853ee21bab-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e7e69cdf28f8ce6b69b4e1853ee21bab-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e7e69cdf28f8ce6b69b4e1853ee21bab-Reviews.html", "metareview": "", "pdf_size": 794012, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17119928599826946784&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Computer Science Dept., The Technion, Israel; Electrical Engineering Dept., The Technion, Israel; Electrical Engineering Dept., The Technion, Israel", "aff_domain": "gmail.com;gmail.com;ee.technion.ac.il", "email": "gmail.com;gmail.com;ee.technion.ac.il", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e7e69cdf28f8ce6b69b4e1853ee21bab-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Technion", "aff_unique_dep": "Computer Science Dept.", "aff_unique_url": "https://www.technion.ac.il", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "Efficient Neural Network Robustness Certification with General Activation Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11484", "id": "11484", "author_site": "Huan Zhang, Tsui-Wei Weng, Pin-Yu Chen, Cho-Jui Hsieh, Luca Daniel", "author": "Huan Zhang; Tsui-Wei Weng; Pin-Yu Chen; Cho-Jui Hsieh; Luca Daniel", "abstract": "Finding minimum distortion of adversarial examples and thus certifying robustness in neural networks classifiers is known to be a challenging problem. Nevertheless, recently it has been shown to be possible to give a non-trivial certified lower bound of minimum distortion, and some recent progress has been made towards this direction by exploiting the piece-wise linear nature of ReLU activations. However, a generic robustness certification for \\textit{general} activation functions still remains largely unexplored. To address this issue, in this paper we introduce CROWN, a general framework to certify robustness of neural networks with general activation functions. The novelty in our algorithm consists of bounding a given activation function with linear and quadratic functions, hence allowing it to tackle general activation functions including but not limited to the four popular choices: ReLU, tanh, sigmoid and arctan. In addition, we facilitate the search for a tighter certified lower bound by \\textit{adaptively} selecting appropriate surrogates for each neuron activation. Experimental results show that CROWN on ReLU networks can notably improve the certified lower bounds compared to the current state-of-the-art algorithm Fast-Lin, while having comparable computational efficiency. Furthermore, CROWN also demonstrates its effectiveness and flexibility on networks with general activation functions, including tanh, sigmoid and arctan.", "bibtex": "@inproceedings{NEURIPS2018_d04863f1,\n author = {Zhang, Huan and Weng, Tsui-Wei and Chen, Pin-Yu and Hsieh, Cho-Jui and Daniel, Luca},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Neural Network Robustness Certification with General Activation Functions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d04863f100d59b3eb688a11f95b0ae60-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d04863f100d59b3eb688a11f95b0ae60-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d04863f100d59b3eb688a11f95b0ae60-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d04863f100d59b3eb688a11f95b0ae60-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d04863f100d59b3eb688a11f95b0ae60-Reviews.html", "metareview": "", "pdf_size": 641749, "gs_citation": 990, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6606953928208344058&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "University of California, Los Angeles, Los Angeles CA 90095; Massachusetts Institute of Technology, Cambridge, MA 02139; MIT-IBM Watson AI Lab, IBM Research, Yorktown Heights, NY 10598 + Massachusetts Institute of Technology, Cambridge, MA 02139; University of California, Los Angeles, Los Angeles CA 90095; Massachusetts Institute of Technology, Cambridge, MA 02139", "aff_domain": "huan-zhang.com;mit.edu;ibm.com;cs.ucla.edu;mit.edu", "email": "huan-zhang.com;mit.edu;ibm.com;cs.ucla.edu;mit.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d04863f100d59b3eb688a11f95b0ae60-Abstract.html", "aff_unique_index": "0;1;2+1;0;1", "aff_unique_norm": "University of California, Los Angeles;Massachusetts Institute of Technology;IBM", "aff_unique_dep": ";;AI Lab", "aff_unique_url": "https://www.ucla.edu;https://www.mit.edu;", "aff_unique_abbr": "UCLA;MIT;MIT-IBM AI Lab", "aff_campus_unique_index": "0;1;1;0;1", "aff_campus_unique": "Los Angeles;Cambridge;", "aff_country_unique_index": "0;0;0+0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Online Portfolio with Logarithmic Regret", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11788", "id": "11788", "author_site": "Haipeng Luo, Chen-Yu Wei, Kai Zheng", "author": "Haipeng Luo; Chen-Yu Wei; Kai Zheng", "abstract": "We study the decades-old problem of online portfolio management and propose the first algorithm with logarithmic regret that is not based on Cover's Universal Portfolio algorithm and admits much faster implementation. Specifically Universal Portfolio enjoys optimal regret $\\mathcal{O}(N\\ln T)$ for $N$ financial instruments over $T$ rounds, but requires log-concave sampling and has a large polynomial running time. Our algorithm, on the other hand, ensures a slightly larger but still logarithmic regret of $\\mathcal{O}(N^2(\\ln T)^4)$, and is based on the well-studied Online Mirror Descent framework with a novel regularizer that can be implemented via standard optimization methods in time $\\mathcal{O}(TN^{2.5})$ per round. The regret of all other existing works is either polynomial in $T$ or has a potentially unbounded factor such as the inverse of the smallest price relative.", "bibtex": "@inproceedings{NEURIPS2018_91c77393,\n author = {Luo, Haipeng and Wei, Chen-Yu and Zheng, Kai},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Online Portfolio with Logarithmic Regret},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/91c77393975889bd08f301c9e13a44b7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/91c77393975889bd08f301c9e13a44b7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/91c77393975889bd08f301c9e13a44b7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/91c77393975889bd08f301c9e13a44b7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/91c77393975889bd08f301c9e13a44b7-Reviews.html", "metareview": "", "pdf_size": 297011, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17698385555404879626&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, University of Southern California; Department of Computer Science, University of Southern California; Key Laboratory of Machine Perception, MOE, School of EECS, Peking University + Center for Data Science, Peking University, Beijing Institute of Big Data Research", "aff_domain": "usc.edu;usc.edu;pku.edu.cn", "email": "usc.edu;usc.edu;pku.edu.cn", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/91c77393975889bd08f301c9e13a44b7-Abstract.html", "aff_unique_index": "0;0;1+1", "aff_unique_norm": "University of Southern California;Peking University", "aff_unique_dep": "Department of Computer Science;School of EECS", "aff_unique_url": "https://www.usc.edu;http://www.pku.edu.cn", "aff_unique_abbr": "USC;Peking U", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Los Angeles;;Beijing", "aff_country_unique_index": "0;0;1+1", "aff_country_unique": "United States;China" }, { "title": "Efficient Projection onto the Perfect Phylogeny Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11407", "id": "11407", "author_site": "Bei Jia, Surjyendu Ray, Sam Safavi, Jos\u00e9 Bento", "author": "Bei Jia; Surjyendu Ray; Sam Safavi; Jos\u00e9 Bento", "abstract": "Several algorithms build on the perfect phylogeny model to infer evolutionary trees. This problem is particularly hard when evolutionary trees are inferred from the fraction of genomes that have mutations in different positions, across different samples. Existing algorithms might do extensive searches over the space of possible trees. At the center of these algorithms is a projection problem that assigns a fitness cost to phylogenetic trees. In order to perform a wide search over the space of the trees, it is critical to solve this projection problem fast. In this paper, we use Moreau's decomposition for proximal operators, and a tree reduction scheme, to develop a new algorithm to compute this projection. Our algorithm terminates with an exact solution in a finite number of steps, and is extremely fast. In particular, it can search over all evolutionary trees with fewer than 11 nodes, a size relevant for several biological problems (more than 2 billion trees) in about 2 hours.", "bibtex": "@inproceedings{NEURIPS2018_d198bd73,\n author = {Jia, Bei and Ray, Surjyendu and Safavi, Sam and Bento, Jos\\'{e}},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Projection onto the Perfect Phylogeny Model},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d198bd736a97e7cecfdf8f4f2027ef80-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d198bd736a97e7cecfdf8f4f2027ef80-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d198bd736a97e7cecfdf8f4f2027ef80-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d198bd736a97e7cecfdf8f4f2027ef80-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d198bd736a97e7cecfdf8f4f2027ef80-Reviews.html", "metareview": "", "pdf_size": 663981, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5821955687711188887&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Boston College\u2217; Boston College; Boston College; Boston College", "aff_domain": "bc.edu;bc.edu;bc.edu;bc.edu", "email": "bc.edu;bc.edu;bc.edu;bc.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d198bd736a97e7cecfdf8f4f2027ef80-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Boston College", "aff_unique_dep": "", "aff_unique_url": "https://www.bostoncollege.edu", "aff_unique_abbr": "BC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Stochastic Gradient Hard Thresholding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11211", "id": "11211", "author_site": "Pan Zhou, Xiaotong Yuan, Jiashi Feng", "author": "Pan Zhou; Xiaotong Yuan; Jiashi Feng", "abstract": "Stochastic gradient hard thresholding methods have recently been shown to work favorably in solving large-scale empirical risk minimization problems under sparsity or rank constraint. Despite the improved iteration complexity over full gradient methods, the gradient evaluation and hard thresholding complexity of the existing stochastic algorithms usually scales linearly with data size, which could still be expensive when data is huge and the hard thresholding step could be as expensive as singular value decomposition in rank-constrained problems. To address these deficiencies, we propose an efficient hybrid stochastic gradient hard thresholding (HSG-HT) method that can be provably shown to have sample-size-independent gradient evaluation and hard thresholding complexity bounds. Specifically, we prove that the stochastic gradient evaluation complexity of HSG-HT scales linearly with inverse of sub-optimality and its hard thresholding complexity scales logarithmically. By applying the heavy ball acceleration technique, we further propose an accelerated variant of HSG-HT which can be shown to have improved factor dependence on restricted condition number. Numerical results confirm our theoretical affirmation and demonstrate the computational efficiency of the proposed methods.", "bibtex": "@inproceedings{NEURIPS2018_ec5aa0b7,\n author = {Zhou, Pan and Yuan, Xiaotong and Feng, Jiashi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Stochastic Gradient Hard Thresholding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ec5aa0b7846082a2415f0902f0da88f2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ec5aa0b7846082a2415f0902f0da88f2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ec5aa0b7846082a2415f0902f0da88f2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ec5aa0b7846082a2415f0902f0da88f2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ec5aa0b7846082a2415f0902f0da88f2-Reviews.html", "metareview": "", "pdf_size": 473940, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8866631150245266849&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Learning &Vision Lab, National University of Singapore, Singapore; B-DAT Lab, Nanjing University of Information Science &Technology, Nanjing, China; Learning &Vision Lab, National University of Singapore, Singapore", "aff_domain": "u.nus.edu;nuist.edu.cn;nus.edu.sg", "email": "u.nus.edu;nuist.edu.cn;nus.edu.sg", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ec5aa0b7846082a2415f0902f0da88f2-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "National University of Singapore;Nanjing University of Information Science & Technology", "aff_unique_dep": "Learning &Vision Lab;B-DAT Lab", "aff_unique_url": "https://www.nus.edu.sg;", "aff_unique_abbr": "NUS;", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Singapore;Nanjing", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Efficient inference for time-varying behavior during learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11555", "id": "11555", "author_site": "Nicholas Roy, Ji Hyun Bak, Athena Akrami, Carlos Brody, Jonathan Pillow", "author": "Nicholas A. Roy; Ji Hyun Bak; Athena Akrami; Carlos Brody; Jonathan W Pillow", "abstract": "The process of learning new behaviors over time is a problem of great interest in both neuroscience and artificial intelligence. However, most standard analyses of animal training data either treat behavior as fixed or track only coarse performance statistics (e.g., accuracy, bias), providing limited insight into the evolution of the policies governing behavior. To overcome these limitations, we propose a dynamic psychophysical model that efficiently tracks trial-to-trial changes in behavior over the course of training. Our model consists of a dynamic logistic regression model, parametrized by a set of time-varying weights that express dependence on sensory stimuli as well as task-irrelevant covariates, such as stimulus, choice, and answer history. Our implementation scales to large behavioral datasets, allowing us to infer 500K parameters (e.g. 10 weights over 50K trials) in minutes on a desktop computer. We optimize hyperparameters governing how rapidly each weight evolves over time using the decoupled Laplace approximation, an efficient method for maximizing marginal likelihood in non-conjugate models. To illustrate performance, we apply our method to psychophysical data from both rats and human subjects learning a delayed sensory discrimination task. The model successfully tracks the psychophysical weights of rats over the course of training, capturing day-to-day and trial-to-trial fluctuations that underlie changes in performance, choice bias, and dependencies on task history. Finally, we investigate why rats frequently make mistakes on easy trials, and suggest that apparent lapses can be explained by sub-optimal weighting of known task covariates.", "bibtex": "@inproceedings{NEURIPS2018_cdcb2f5c,\n author = {Roy, Nicholas A. and Bak, Ji Hyun and Akrami, Athena and Brody, Carlos and Pillow, Jonathan W},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient inference for time-varying behavior during learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cdcb2f5c7b071143529ef7f2705dfbc4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cdcb2f5c7b071143529ef7f2705dfbc4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cdcb2f5c7b071143529ef7f2705dfbc4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cdcb2f5c7b071143529ef7f2705dfbc4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cdcb2f5c7b071143529ef7f2705dfbc4-Reviews.html", "metareview": "", "pdf_size": 4463281, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1964377870730614821&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Princeton Neuroscience Institute, Princeton University; Korea Institute for Advanced Study; Princeton Neuroscience Institute, Princeton University + Howard Hughes Medical Institute + Sainsbury Wellcome Centre, UCL; Princeton Neuroscience Institute, Princeton University + Dept. of Molecular Biology, Princeton University; Princeton Neuroscience Institute, Princeton University + Dept. of Psychology, Princeton University", "aff_domain": "princeton.edu;kias.re.kr;ucl.ac.uk;princeton.edu;princeton.edu", "email": "princeton.edu;kias.re.kr;ucl.ac.uk;princeton.edu;princeton.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cdcb2f5c7b071143529ef7f2705dfbc4-Abstract.html", "aff_unique_index": "0;1;0+2+3;0+0;0+0", "aff_unique_norm": "Princeton University;Korea Institute for Advanced Study;Howard Hughes Medical Institute;University College London", "aff_unique_dep": "Princeton Neuroscience Institute;;;Sainsbury Wellcome Centre", "aff_unique_url": "https://www.princeton.edu;http://www.kaist.edu;https://www.hhmi.org;https://www.ucl.ac.uk", "aff_unique_abbr": "Princeton;KIAS;HHMI;UCL", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Princeton;", "aff_country_unique_index": "0;1;0+0+2;0+0;0+0", "aff_country_unique": "United States;South Korea;United Kingdom" }, { "title": "Efficient nonmyopic batch active search", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11129", "id": "11129", "author_site": "Shali Jiang, Gustavo Malkomes, Matthew Abbott, Benjamin Moseley, Roman Garnett", "author": "Shali Jiang; Gustavo Malkomes; Matthew Abbott; Benjamin Moseley; Roman Garnett", "abstract": "Active search is a learning paradigm for actively identifying as many members of a given class as possible. A critical target scenario is high-throughput screening for scientific discovery, such as drug or materials discovery. In these settings, specialized instruments can often evaluate \\emph{multiple} points simultaneously; however, all existing work on active search focuses on sequential acquisition. We bridge this gap, addressing batch active search from both the theoretical and practical perspective. We first derive the Bayesian optimal policy for this problem, then prove a lower bound on the performance gap between sequential and batch optimal policies: the ``cost of parallelization.'' We also propose novel, efficient batch policies inspired by state-of-the-art sequential policies, and develop an aggressive pruning technique that can dramatically speed up computation. We conduct thorough experiments on data from three application domains: a citation network, material science, and drug discovery, testing all proposed policies (14 total) with a wide range of batch sizes. Our results demonstrate that the empirical performance gap matches our theoretical bound, that nonmyopic policies usually significantly outperform myopic alternatives, and that diversity is an important consideration for batch policy design.", "bibtex": "@inproceedings{NEURIPS2018_a7aeed74,\n author = {Jiang, Shali and Malkomes, Gustavo and Abbott, Matthew and Moseley, Benjamin and Garnett, Roman},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient nonmyopic batch active search},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a7aeed74714116f3b292a982238f83d2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a7aeed74714116f3b292a982238f83d2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a7aeed74714116f3b292a982238f83d2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a7aeed74714116f3b292a982238f83d2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a7aeed74714116f3b292a982238f83d2-Reviews.html", "metareview": "", "pdf_size": 322455, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6897489142858783657&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "CSE, WUSTL; CSE, WUSTL; CSE, WUSTL; Tepper School of Business, CMU + Relational AI; CSE, WUSTL", "aff_domain": "wustl.edu;wustl.edu;wustl.edu;andrew.cmu.edu;wustl.edu", "email": "wustl.edu;wustl.edu;wustl.edu;andrew.cmu.edu;wustl.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a7aeed74714116f3b292a982238f83d2-Abstract.html", "aff_unique_index": "0;0;0;1+2;0", "aff_unique_norm": "Washington University in St. Louis;Carnegie Mellon University;Relational AI", "aff_unique_dep": "Department of Computer Science and Engineering;Tepper School of Business;", "aff_unique_url": "https://wustl.edu;https://www.cmu.edu;https://www.relationalai.com", "aff_unique_abbr": "WUSTL;CMU;Relational AI", "aff_campus_unique_index": "0;0;0;;0", "aff_campus_unique": "St. Louis;", "aff_country_unique_index": "0;0;0;0+0;0", "aff_country_unique": "United States" }, { "title": "Efficient online algorithms for fast-rate regret bounds under sparsity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11677", "id": "11677", "author_site": "Pierre Gaillard, Olivier Wintenberger", "author": "Pierre Gaillard; Olivier Wintenberger", "abstract": "We consider the problem of online convex optimization in two different settings: arbitrary and i.i.d. sequence of convex loss functions. In both settings, we provide efficient algorithms whose cumulative excess risks are controlled with fast-rate sparse bounds. \nFirst, the excess risks bounds depend on the sparsity of the objective rather than on the dimension of the parameters space. Second, their rates are faster than the slow-rate $1/\\sqrt{T}$ under additional convexity assumptions on the loss functions. In the adversarial setting, we develop an algorithm BOA+ whose cumulative excess risks is controlled by several bounds with different trade-offs between sparsity and rate for strongly convex loss functions. In the i.i.d. setting under the \u0141ojasiewicz's assumption, we establish new risk bounds that are sparse with a rate adaptive to the convexity of the risk (ranging from a rate $1/\\sqrt{T}$ for general convex risk to $1/T$ for strongly convex risk). These results generalize previous works on sparse online learning under weak assumptions on the risk.", "bibtex": "@inproceedings{NEURIPS2018_0a348ede,\n author = {Gaillard, Pierre and Wintenberger, Olivier},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient online algorithms for fast-rate regret bounds under sparsity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0a348ede8ac3768875037baca5de6e26-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0a348ede8ac3768875037baca5de6e26-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0a348ede8ac3768875037baca5de6e26-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0a348ede8ac3768875037baca5de6e26-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0a348ede8ac3768875037baca5de6e26-Reviews.html", "metareview": "", "pdf_size": 787029, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5522459852606253426&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "INRIA, ENS, PSL Research University; Sorbonne Universit\u00e9, CNRS, LPSM", "aff_domain": "inria.fr;upmc.fr", "email": "inria.fr;upmc.fr", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0a348ede8ac3768875037baca5de6e26-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "INRIA;Sorbonne Universit\u00e9", "aff_unique_dep": ";CNRS, LPSM", "aff_unique_url": "https://www.inria.fr;https://www.sorbonne-universite.fr", "aff_unique_abbr": "INRIA;Sorbonne U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Embedding Logical Queries on Knowledge Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11215", "id": "11215", "author_site": "Will Hamilton, Payal Bajaj, Marinka Zitnik, Dan Jurafsky, Jure Leskovec", "author": "Will Hamilton; Payal Bajaj; Marinka Zitnik; Dan Jurafsky; Jure Leskovec", "abstract": "Learning low-dimensional embeddings of knowledge graphs is a powerful approach used to predict unobserved or missing edges between entities. However, an open challenge in this area is developing techniques that can go beyond simple edge prediction and handle more complex logical queries, which might involve multiple unobserved edges, entities, and variables. For instance, given an incomplete biological knowledge graph, we might want to predict \"em what drugs are likely to target proteins involved with both diseases X and Y?\" -- a query that requires reasoning about all possible proteins that might interact with diseases X and Y. Here we introduce a framework to efficiently make predictions about conjunctive logical queries -- a flexible but tractable subset of first-order logic -- on incomplete knowledge graphs. In our approach, we embed graph nodes in a low-dimensional space and represent logical operators as learned geometric operations (e.g., translation, rotation) in this embedding space. By performing logical operations within a low-dimensional embedding space, our approach achieves a time complexity that is linear in the number of query variables, compared to the exponential complexity required by a naive enumeration-based approach. We demonstrate the utility of this framework in two application studies on real-world datasets with millions of relations: predicting logical relationships in a network of drug-gene-disease interactions and in a graph-based representation of social interactions derived from a popular web forum.", "bibtex": "@inproceedings{NEURIPS2018_ef50c335,\n author = {Hamilton, Will and Bajaj, Payal and Zitnik, Marinka and Jurafsky, Dan and Leskovec, Jure},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Embedding Logical Queries on Knowledge Graphs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ef50c335cca9f340bde656363ebd02fd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ef50c335cca9f340bde656363ebd02fd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ef50c335cca9f340bde656363ebd02fd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ef50c335cca9f340bde656363ebd02fd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ef50c335cca9f340bde656363ebd02fd-Reviews.html", "metareview": "", "pdf_size": 1274980, "gs_citation": 381, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9948805019620970484&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Stanford University, Department of Computer Science; Stanford University, Department of Computer Science; Stanford University, Department of Computer Science; Stanford University, Department of Linguistics; Stanford University, Department of Computer Science", "aff_domain": "stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;cs.stanford.edu", "email": "stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ef50c335cca9f340bde656363ebd02fd-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Empirical Risk Minimization Under Fairness Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11286", "id": "11286", "author_site": "Michele Donini, Luca Oneto, Shai Ben-David, John Shawe-Taylor, Massimiliano Pontil", "author": "Michele Donini; Luca Oneto; Shai Ben-David; John S Shawe-Taylor; Massimiliano Pontil", "abstract": "We address the problem of algorithmic fairness: ensuring that sensitive information does not unfairly influence the outcome of a classifier. We present an approach based on empirical risk minimization, which incorporates a fairness constraint into the learning problem. It encourages the conditional risk of the learned classifier to be approximately constant with respect to the sensitive variable. We derive both risk and fairness bounds that support the statistical consistency of our methodology. We specify our approach to kernel methods and observe that the fairness requirement implies an orthogonality constraint which can be easily added to these methods. We further observe that for linear models the constraint translates into a simple data preprocessing step. Experiments indicate that the method is empirically effective and performs favorably against state-of-the-art approaches.", "bibtex": "@inproceedings{NEURIPS2018_83cdcec0,\n author = {Donini, Michele and Oneto, Luca and Ben-David, Shai and Shawe-Taylor, John S and Pontil, Massimiliano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Empirical Risk Minimization Under Fairness Constraints},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/83cdcec08fbf90370fcf53bdd56604ff-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/83cdcec08fbf90370fcf53bdd56604ff-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/83cdcec08fbf90370fcf53bdd56604ff-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/83cdcec08fbf90370fcf53bdd56604ff-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/83cdcec08fbf90370fcf53bdd56604ff-Reviews.html", "metareview": "", "pdf_size": 1017297, "gs_citation": 578, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5746250113194301793&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Istituto Italiano di Tecnologia (Italy); University of Genoa (Italy); University of Waterloo (Canada); University College London (UK); Istituto Italiano di Tecnologia (Italy)+University College London (UK)", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/83cdcec08fbf90370fcf53bdd56604ff-Abstract.html", "aff_unique_index": "0;1;2;3;0+3", "aff_unique_norm": "Istituto Italiano di Tecnologia;University of Genoa;University of Waterloo;University College London", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.iit.it;https://www.unige.it;https://uwaterloo.ca;https://www.ucl.ac.uk", "aff_unique_abbr": "IIT;UniGe;UW;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0+2", "aff_country_unique": "Italy;Canada;United Kingdom" }, { "title": "Empirical Risk Minimization in Non-interactive Local Differential Privacy Revisited", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11117", "id": "11117", "author_site": "Di Wang, Marco Gaboardi, Jinhui Xu", "author": "Di Wang; Marco Gaboardi; Jinhui Xu", "abstract": "In this paper, we revisit the Empirical Risk Minimization problem in the non-interactive local model of differential privacy. In the case of constant or low dimensions ($p\\ll n$), we first show that if the loss function is $(\\infty, T)$-smooth, we can avoid a dependence of the sample complexity, to achieve error $\\alpha$, on the exponential of the dimensionality $p$ with base $1/\\alpha$ ({\\em i.e.,} $\\alpha^{-p}$),\n which answers a question in \\cite{smith2017interaction}. Our approach is based on polynomial approximation. Then, we propose player-efficient algorithms with $1$-bit communication complexity and $O(1)$ computation cost for each player. The error bound is asymptotically the same as the original one. With some additional assumptions, we also give an efficient algorithm for the server. \n In the case of high dimensions ($n\\ll p$), we show that if the loss function is a convex generalized linear function, the error can be bounded by using the Gaussian width of the constrained set, instead of $p$, which improves the one in \n \\cite{smith2017interaction}.", "bibtex": "@inproceedings{NEURIPS2018_13f320e7,\n author = {Wang, Di and Gaboardi, Marco and Xu, Jinhui},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Empirical Risk Minimization in Non-interactive Local Differential Privacy Revisited},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/13f320e7b5ead1024ac95c3b208610db-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/13f320e7b5ead1024ac95c3b208610db-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/13f320e7b5ead1024ac95c3b208610db-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/13f320e7b5ead1024ac95c3b208610db-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/13f320e7b5ead1024ac95c3b208610db-Reviews.html", "metareview": "", "pdf_size": 394433, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3444994658041705833&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/13f320e7b5ead1024ac95c3b208610db-Abstract.html" }, { "title": "End-to-End Differentiable Physics for Learning and Control", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11691", "id": "11691", "author_site": "Filipe de Avila Belbute Peres, Kevin Smith, Kelsey Allen, Josh Tenenbaum, J. Zico Kolter", "author": "Filipe de Avila Belbute-Peres; Kevin Smith; Kelsey Allen; Josh Tenenbaum; J. Zico Kolter", "abstract": "We present a differentiable physics engine that can be integrated as a module in deep neural networks for end-to-end learning. As a result, structured physics knowledge can be embedded into larger systems, allowing them, for example, to match observations by performing precise simulations, while achieves high sample efficiency. Specifically, in this paper we demonstrate how to perform backpropagation analytically through a physical simulator defined via a linear complementarity problem. Unlike traditional finite difference methods, such gradients can be computed analytically, which allows for greater flexibility of the engine. Through experiments in diverse domains, we highlight the system's ability to learn physical parameters from data, efficiently match and simulate observed visual behavior, and readily enable control via gradient-based planning methods. Code for the engine and experiments is included with the paper.", "bibtex": "@inproceedings{NEURIPS2018_842424a1,\n author = {de Avila Belbute-Peres, Filipe and Smith, Kevin and Allen, Kelsey and Tenenbaum, Josh and Kolter, J. Zico},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {End-to-End Differentiable Physics for Learning and Control},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/842424a1d0595b76ec4fa03c46e8d755-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/842424a1d0595b76ec4fa03c46e8d755-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/842424a1d0595b76ec4fa03c46e8d755-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/842424a1d0595b76ec4fa03c46e8d755-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/842424a1d0595b76ec4fa03c46e8d755-Reviews.html", "metareview": "", "pdf_size": 813229, "gs_citation": 509, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14112447268670731682&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "School of Computer Science, Carnegie Mellon University; Brain and Cognitive Sciences, Massachusetts Institute of Technology; Brain and Cognitive Sciences, Massachusetts Institute of Technology; Brain and Cognitive Sciences, Massachusetts Institute of Technology; School of Computer Science, Carnegie Mellon University + Bosch Center for Artificial Intelligence", "aff_domain": "cs.cmu.edu;mit.edu;mit.edu;mit.edu;cs.cmu.edu", "email": "cs.cmu.edu;mit.edu;mit.edu;mit.edu;cs.cmu.edu", "github": "https://github.com/locuslab/lcp-physics", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/842424a1d0595b76ec4fa03c46e8d755-Abstract.html", "aff_unique_index": "0;1;1;1;0+2", "aff_unique_norm": "Carnegie Mellon University;Massachusetts Institute of Technology;Bosch Center for Artificial Intelligence", "aff_unique_dep": "School of Computer Science;Brain and Cognitive Sciences;Center for Artificial Intelligence", "aff_unique_url": "https://www.cmu.edu;https://web.mit.edu;https://www.bosch-ai.com", "aff_unique_abbr": "CMU;MIT;BCAI", "aff_campus_unique_index": "0;1;1;1;0", "aff_campus_unique": "Pittsburgh;Cambridge;", "aff_country_unique_index": "0;0;0;0;0+1", "aff_country_unique": "United States;Germany" }, { "title": "End-to-end Symmetry Preserving Inter-atomic Potential Energy Model for Finite and Extended Systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11438", "id": "11438", "author_site": "Linfeng Zhang, Jiequn Han, Han Wang, Wissam Saidi, Roberto Car, Weinan E", "author": "Linfeng Zhang; Jiequn Han; Han Wang; Wissam Saidi; Roberto Car; Weinan E", "abstract": "Machine learning models are changing the paradigm of molecular modeling, which is a fundamental tool for material science, chemistry, and computational biology. Of particular interest is the inter-atomic potential energy surface (PES). Here we develop Deep Potential - Smooth Edition (DeepPot-SE), an end-to-end machine learning-based PES model, which is able to efficiently represent the PES for a wide variety of systems with the accuracy of ab initio quantum mechanics models. By construction, DeepPot-SE is extensive and continuously differentiable, scales linearly with system size, and preserves all the natural symmetries of the system. Further, we show that DeepPot-SE describes finite and extended systems including organic molecules, metals, semiconductors, and insulators with high fidelity.", "bibtex": "@inproceedings{NEURIPS2018_e2ad76f2,\n author = {Zhang, Linfeng and Han, Jiequn and Wang, Han and Saidi, Wissam and Car, Roberto and E, Weinan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {End-to-end Symmetry Preserving Inter-atomic Potential Energy Model for Finite and Extended Systems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e2ad76f2326fbc6b56a45a56c59fafdb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e2ad76f2326fbc6b56a45a56c59fafdb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e2ad76f2326fbc6b56a45a56c59fafdb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e2ad76f2326fbc6b56a45a56c59fafdb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e2ad76f2326fbc6b56a45a56c59fafdb-Reviews.html", "metareview": "", "pdf_size": 1994588, "gs_citation": 622, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4009423108945551834&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e2ad76f2326fbc6b56a45a56c59fafdb-Abstract.html" }, { "title": "Enhancing the Accuracy and Fairness of Human Decision Making", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11190", "id": "11190", "author_site": "Isabel Valera, Adish Singla, Manuel Gomez Rodriguez", "author": "Isabel Valera; Adish Singla; Manuel Gomez Rodriguez", "abstract": "Societies often rely on human experts to take a wide variety of decisions affecting their members, from jail-or-release decisions taken by judges and stop-and-frisk decisions taken by police officers to accept-or-reject decisions taken by academics. In this context, each decision is taken by an expert who is typically chosen uniformly at random from a pool of experts. However, these decisions may be imperfect due to limited experience, implicit biases, or faulty probabilistic reasoning. Can we improve the accuracy and fairness of the overall decision making process by optimizing the assignment between experts and decisions?", "bibtex": "@inproceedings{NEURIPS2018_0a113ef6,\n author = {Valera, Isabel and Singla, Adish and Gomez Rodriguez, Manuel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Enhancing the Accuracy and Fairness of Human Decision Making},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0a113ef6b61820daa5611c870ed8d5ee-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0a113ef6b61820daa5611c870ed8d5ee-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0a113ef6b61820daa5611c870ed8d5ee-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0a113ef6b61820daa5611c870ed8d5ee-Reviews.html", "metareview": "", "pdf_size": 389406, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9266559070813035929&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "MPI for Intelligent Systems; MPI-SWS; MPI-SWS", "aff_domain": "tue.mpg.de;mpi-sws.org;mpi-sws.org", "email": "tue.mpg.de;mpi-sws.org;mpi-sws.org", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0a113ef6b61820daa5611c870ed8d5ee-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Max Planck Institute for Software Systems", "aff_unique_dep": ";", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.mpi-sws.org", "aff_unique_abbr": "MPI-IS;MPI-SWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Entropy Rate Estimation for Markov Chains with Large State Space", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11929", "id": "11929", "author_site": "Yanjun Han, Jiantao Jiao, Chuan-Zheng Lee, Tsachy Weissman, Yihong Wu, Tiancheng Yu", "author": "Yanjun Han; Jiantao Jiao; Chuan-Zheng Lee; Tsachy Weissman; Yihong Wu; Tiancheng Yu", "abstract": "Entropy estimation is one of the prototypical problems in distribution property testing. To consistently estimate the Shannon entropy of a distribution on $S$ elements with independent samples, the optimal sample complexity scales sublinearly with $S$ as $\\Theta(\\frac{S}{\\log S})$ as shown by Valiant and Valiant \\cite{Valiant--Valiant2011}. Extending the theory and algorithms for entropy estimation to dependent data, this paper considers the problem of estimating the entropy rate of a stationary reversible Markov chain with $S$ states from a sample path of $n$ observations. We show that\n\\begin{itemize}\n\t\\item Provided the Markov chain mixes not too slowly, \\textit{i.e.}, the relaxation time is at most $O(\\frac{S}{\\ln^3 S})$, consistent estimation is achievable when $n \\gg \\frac{S^2}{\\log S}$.\n\t\\item Provided the Markov chain has some slight dependency, \\textit{i.e.}, the relaxation time is at least $1+\\Omega(\\frac{\\ln^2 S}{\\sqrt{S}})$, consistent estimation is impossible when $n \\lesssim \\frac{S^2}{\\log S}$.\n\\end{itemize}\nUnder both assumptions, the optimal estimation accuracy is shown to be $\\Theta(\\frac{S^2}{n \\log S})$. In comparison, the empirical entropy rate requires at least $\\Omega(S^2)$ samples to be consistent, even when the Markov chain is memoryless. In addition to synthetic experiments, we also apply the estimators that achieve the optimal sample complexity to estimate the entropy rate of the English language in the Penn Treebank and the Google One Billion Words corpora, which provides a natural benchmark for language modeling and relates it directly to the widely used perplexity measure.", "bibtex": "@inproceedings{NEURIPS2018_99cad265,\n author = {Han, Yanjun and Jiao, Jiantao and Lee, Chuan-Zheng and Weissman, Tsachy and Wu, Yihong and Yu, Tiancheng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Entropy Rate Estimation for Markov Chains with Large State Space},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/99cad265a1768cc2dd013f0e740300ae-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/99cad265a1768cc2dd013f0e740300ae-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/99cad265a1768cc2dd013f0e740300ae-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/99cad265a1768cc2dd013f0e740300ae-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/99cad265a1768cc2dd013f0e740300ae-Reviews.html", "metareview": "", "pdf_size": 414050, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5149993878731964631&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Department of Electrical Engineering, Stanford University; Department of Electrical Engineering and Computer Sciences, University of California, Berkeley; Department of Electrical Engineering, Stanford University; Department of Electrical Engineering, Stanford University; Department of Statistics and Data Science, Yale University; Department of Electronic Engineering, Tsinghua University", "aff_domain": "stanford.edu;berkeley.edu;stanford.edu;stanford.edu;yale.edu;foxmail.com", "email": "stanford.edu;berkeley.edu;stanford.edu;stanford.edu;yale.edu;foxmail.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/99cad265a1768cc2dd013f0e740300ae-Abstract.html", "aff_unique_index": "0;1;0;0;2;3", "aff_unique_norm": "Stanford University;University of California, Berkeley;Yale University;Tsinghua University", "aff_unique_dep": "Department of Electrical Engineering;Department of Electrical Engineering and Computer Sciences;Department of Statistics and Data Science;Department of Electronic Engineering", "aff_unique_url": "https://www.stanford.edu;https://www.berkeley.edu;https://www.yale.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Stanford;UC Berkeley;Yale;THU", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Stanford;Berkeley;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Entropy and mutual information in models of deep neural networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11195", "id": "11195", "author_site": "Marylou Gabri\u00e9, Andre Manoel, Cl\u00e9ment Luneau, jean barbier, Nicolas Macris, Florent Krzakala, Lenka Zdeborov\u00e1", "author": "Marylou Gabri\u00e9; Andre Manoel; Cl\u00e9ment Luneau; jean barbier; Nicolas Macris; Florent Krzakala; Lenka Zdeborov\u00e1", "abstract": "We examine a class of stochastic deep learning models with a tractable method to compute information-theoretic quantities. Our contributions are three-fold: (i) We show how entropies and mutual informations can be derived from heuristic statistical physics methods, under the assumption that weight matrices are independent and orthogonally-invariant. (ii) We extend particular cases in which this result is known to be rigorously exact by providing a proof for two-layers networks with Gaussian random weights, using the recently introduced adaptive interpolation method. (iii) We propose an experiment framework with generative models of synthetic datasets, on which we train deep neural networks with a weight constraint designed so that the assumption in (i) is verified during learning. We study the behavior of entropies and mutual information throughout learning and conclude that, in the proposed setting, the relationship between compression and generalization remains elusive.", "bibtex": "@inproceedings{NEURIPS2018_6d0f8463,\n author = {Gabri\\'{e}, Marylou and Manoel, Andre and Luneau, Cl\\'{e}ment and barbier, jean and Macris, Nicolas and Krzakala, Florent and Zdeborov\\'{a}, Lenka},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Entropy and mutual information in models of deep neural networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6d0f846348a856321729a2f36734d1a7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6d0f846348a856321729a2f36734d1a7-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6d0f846348a856321729a2f36734d1a7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6d0f846348a856321729a2f36734d1a7-Reviews.html", "metareview": "", "pdf_size": 800924, "gs_citation": 232, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12349930318293918996&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 21, "aff": "Laboratoire de Physique Statistique, \u00c9cole Normale Sup\u00e9rieure, PSL University; Parietal Team, INRIA, CEA, Universit\u00e9 Paris-Saclay & Owkin Inc., New York; Institut de Physique Th\u00e9orique, CEA, CNRS, Universit\u00e9 Paris-Saclay; Laboratoire de Th\u00e9orie des Communications, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne; International Center for Theoretical Physics, Trieste, Italy; Department of Mathematics, Duke University, Durham NC; Sorbonne Universit\u00e9s & LightOn Inc., Paris", "aff_domain": "ens.fr; ; ; ; ; ; ", "email": "ens.fr; ; ; ; ; ; ", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6d0f846348a856321729a2f36734d1a7-Abstract.html", "aff_unique_index": "0;1;2;3;4;5;6", "aff_unique_norm": "\u00c9cole Normale Sup\u00e9rieure;INRIA;Universit\u00e9 Paris-Saclay;EPFL;International Center for Theoretical Physics;Duke University;Sorbonne Universit\u00e9s", "aff_unique_dep": "Laboratoire de Physique Statistique;Parietal Team;Institut de Physique Th\u00e9orique;Laboratoire de Th\u00e9orie des Communications;;Department of Mathematics;", "aff_unique_url": "https://www.ens.fr;https://www.inria.fr;https://www.universite-paris-saclay.fr;https://www.epfl.ch;https://www.ictp.it/;https://www.duke.edu;https://www.sorbonne-universite.fr", "aff_unique_abbr": "ENS;INRIA;UPS;EPFL;ICTP;Duke;Sorbonne", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Lausanne;Durham;Paris", "aff_country_unique_index": "0;0;0;1;2;3;0", "aff_country_unique": "France;Switzerland;Italy;United States" }, { "title": "Equality of Opportunity in Classification: A Causal Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11367", "id": "11367", "author_site": "Junzhe Zhang, Elias Bareinboim", "author": "Junzhe Zhang; Elias Bareinboim", "abstract": "The Equalized Odds (for short, EO) is one of the most popular measures of discrimination used in the supervised learning setting. It ascertains fairness through the balance of the misclassification rates (false positive and negative) across the protected groups -- e.g., in the context of law enforcement, an African-American defendant who would not commit a future crime will have an equal opportunity of being released, compared to a non-recidivating Caucasian defendant. Despite this noble goal, it has been acknowledged in the literature that statistical tests based on the EO are oblivious to the underlying causal mechanisms that generated the disparity in the first place (Hardt et al. 2016). This leads to a critical disconnect between statistical measures readable from the data and the meaning of discrimination in the legal system, where compelling evidence that the observed disparity is tied to a specific causal process deemed unfair by society is required to characterize discrimination. The goal of this paper is to develop a principled approach to connect the statistical disparities characterized by the EO and the underlying, elusive, and frequently unobserved, causal mechanisms that generated such inequality. We start by introducing a new family of counterfactual measures that allows one to explain the misclassification disparities in terms of the underlying mechanisms in an arbitrary, non-parametric structural causal model. This will, in turn, allow legal and data analysts to interpret currently deployed classifiers through causal lens, linking the statistical disparities found in the data to the corresponding causal processes. Leveraging the new family of counterfactual measures, we develop a learning procedure to construct a classifier that is statistically efficient, interpretable, and compatible with the basic human intuition of fairness. We demonstrate our results through experiments in both real (COMPAS) and synthetic datasets.", "bibtex": "@inproceedings{NEURIPS2018_ff1418e8,\n author = {Zhang, Junzhe and Bareinboim, Elias},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Equality of Opportunity in Classification: A Causal Approach},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ff1418e8cc993fe8abcfe3ce2003e5c5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ff1418e8cc993fe8abcfe3ce2003e5c5-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ff1418e8cc993fe8abcfe3ce2003e5c5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ff1418e8cc993fe8abcfe3ce2003e5c5-Reviews.html", "metareview": "", "pdf_size": 461569, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4391768513414723407&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Purdue University, USA; Purdue University, USA", "aff_domain": "purdue.edu;purdue.edu", "email": "purdue.edu;purdue.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ff1418e8cc993fe8abcfe3ce2003e5c5-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Escaping Saddle Points in Constrained Optimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11363", "id": "11363", "author_site": "Aryan Mokhtari, Asuman Ozdaglar, Ali Jadbabaie", "author": "Aryan Mokhtari; Asuman Ozdaglar; Ali Jadbabaie", "abstract": "In this paper, we study the problem of escaping from saddle points in smooth\nnonconvex optimization problems subject to a convex set $\\mathcal{C}$. We propose a generic framework that yields convergence to a second-order stationary point of the problem, if the convex set $\\mathcal{C}$ is simple for a quadratic objective function. Specifically, our results hold if one can find a $\\rho$-approximate solution of a quadratic program subject to $\\mathcal{C}$ in polynomial time, where $\\rho<1$ is a positive constant that depends on the structure of the set $\\mathcal{C}$. Under this condition, we show that the sequence of iterates generated by the proposed framework reaches an $(\\epsilon,\\gamma)$-second order stationary point (SOSP) in at most $\\mathcal{O}(\\max\\{\\epsilon^{-2},\\rho^{-3}\\gamma^{-3}\\})$ iterations. We further characterize the overall complexity of reaching an SOSP when the convex set $\\mathcal{C}$ can be written as a set of quadratic constraints and the objective function Hessian\nhas a specific structure over the convex $\\mathcal{C}$. Finally, we extend our results to the stochastic setting and characterize the number of stochastic gradient and Hessian evaluations to reach an $(\\epsilon,\\gamma)$-SOSP.", "bibtex": "@inproceedings{NEURIPS2018_069654d5,\n author = {Mokhtari, Aryan and Ozdaglar, Asuman and Jadbabaie, Ali},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Escaping Saddle Points in Constrained Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/069654d5ce089c13f642d19f09a3d1c0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/069654d5ce089c13f642d19f09a3d1c0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/069654d5ce089c13f642d19f09a3d1c0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/069654d5ce089c13f642d19f09a3d1c0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/069654d5ce089c13f642d19f09a3d1c0-Reviews.html", "metareview": "", "pdf_size": 475161, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7241244380549648474&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "MIT; MIT; MIT", "aff_domain": "mit.edu;mit.edu;mit.edu", "email": "mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/069654d5ce089c13f642d19f09a3d1c0-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Estimating Learnability in the Sublinear Data Regime", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11533", "id": "11533", "author_site": "Weihao Kong, Gregory Valiant", "author": "Weihao Kong; Gregory Valiant", "abstract": "We consider the problem of estimating how well a model class is capable of fitting a distribution of labeled data. We show that it is often possible to accurately estimate this ``learnability'' even when given an amount of data that is too small to reliably learn any accurate model. Our first result applies to the setting where the data is drawn from a $d$-dimensional distribution with isotropic covariance, and the label of each datapoint is an arbitrary noisy function of the datapoint. In this setting, we show that with $O(\\sqrt{d})$ samples, one can accurately estimate the fraction of the variance of the label that can be explained via the best linear function of the data. \nWe extend these techniques to a binary classification, and show that the prediction error of the best linear classifier can be accurately estimated given $O(\\sqrt{d})$ labeled samples. For comparison, in both the linear regression and binary classification settings, even if there is no noise in the labels, a sample size linear in the dimension, $d$, is required to \\emph{learn} any function correlated with the underlying model. We further extend our estimation approach to the setting where the data distribution has an (unknown) arbitrary covariance matrix, allowing these techniques to be applied to settings where the model class consists of a linear function applied to a nonlinear embedding of the data. We demonstrate the practical viability of our approaches on synthetic and real data. This ability to estimate the explanatory value of a set of features (or dataset), even in the regime in which there is too little data to realize that explanatory value, may be relevant to the scientific and industrial settings for which data collection is expensive and there are many potentially relevant feature sets that could be collected.", "bibtex": "@inproceedings{NEURIPS2018_8bd39eae,\n author = {Kong, Weihao and Valiant, Gregory},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Estimating Learnability in the Sublinear Data Regime},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8bd39eae38511daad6152e84545e504d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8bd39eae38511daad6152e84545e504d-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8bd39eae38511daad6152e84545e504d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8bd39eae38511daad6152e84545e504d-Reviews.html", "metareview": "", "pdf_size": 708365, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8836370439359308313&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Stanford University; Stanford University", "aff_domain": "stanford.edu;cs.stanford.edu", "email": "stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8bd39eae38511daad6152e84545e504d-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Estimators for Multivariate Information Measures in General Probability Spaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11828", "id": "11828", "author_site": "Arman Rahimzamani, Himanshu Asnani, Pramod Viswanath, Sreeram Kannan", "author": "Arman Rahimzamani; Himanshu Asnani; Pramod Viswanath; Sreeram Kannan", "abstract": "Information theoretic quantities play an important role in various settings in machine learning, including causality testing, structure inference in graphical models, time-series problems, feature selection as well as in providing privacy guarantees. A key quantity of interest is the mutual information and generalizations thereof, including conditional mutual information, multivariate mutual information, total correlation and directed information. While the aforementioned information quantities are well defined in arbitrary probability spaces, existing estimators employ a $\\Sigma H$ method, which can only work in purely discrete space or purely continuous case since entropy (or differential entropy) is well defined only in that regime.\nIn this paper, we define a general graph divergence measure ($\\mathbb{GDM}$), generalizing the aforementioned information measures and we construct a novel estimator via a coupling trick that directly estimates these multivariate information measures using the Radon-Nikodym derivative. These estimators are proven to be consistent in a general setting which includes several cases where the existing estimators fail, thus providing the only known estimators for the following settings: (1) the data has some discrete and some continuous valued components (2) some (or all) of the components themselves are discrete-continuous \\textit{mixtures} (3) the data is real-valued but does not have a joint density on the entire space, rather is supported on a low-dimensional manifold. We show that our proposed estimators significantly outperform known estimators on synthetic and real datasets.", "bibtex": "@inproceedings{NEURIPS2018_c5ab6ceb,\n author = {Rahimzamani, Arman and Asnani, Himanshu and Viswanath, Pramod and Kannan, Sreeram},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Estimators for Multivariate Information Measures in General Probability Spaces},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c5ab6cebaca97f7171139e4d414ff5a6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c5ab6cebaca97f7171139e4d414ff5a6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c5ab6cebaca97f7171139e4d414ff5a6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c5ab6cebaca97f7171139e4d414ff5a6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c5ab6cebaca97f7171139e4d414ff5a6-Reviews.html", "metareview": "", "pdf_size": 470824, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4712614051095883159&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Department of ECE, University of Washington; Department of ECE, University of Washington; Department of ECE, University of Illinois at Urbana-Champaign; Department of ECE, University of Washington", "aff_domain": "uw.edu;uw.edu;illinois.edu;uw.edu", "email": "uw.edu;uw.edu;illinois.edu;uw.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c5ab6cebaca97f7171139e4d414ff5a6-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Washington;University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Electrical and Computer Engineering;Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.washington.edu;https://illinois.edu", "aff_unique_abbr": "UW;UIUC", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Seattle;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Evidential Deep Learning to Quantify Classification Uncertainty", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11322", "id": "11322", "author_site": "Murat Sensoy, Lance Kaplan, Melih Kandemir", "author": "Murat Sensoy; Lance Kaplan; Melih Kandemir", "abstract": "Deterministic neural nets have been shown to learn effective predictors on a wide range of machine learning problems. However, as the standard approach is to train the network to minimize a prediction loss, the resultant model remains ignorant to its prediction confidence. Orthogonally to Bayesian neural nets that indirectly infer prediction uncertainty through weight uncertainties, we propose explicit modeling of the same using the theory of subjective logic. By placing a Dirichlet distribution on the class probabilities, we treat predictions of a neural net as subjective opinions and learn the function that collects the evidence leading to these opinions by a deterministic neural net from data. The resultant predictor for a multi-class classification problem is another Dirichlet distribution whose parameters are set by the continuous output of a neural net. We provide a preliminary analysis on how the peculiarities of our new loss function drive improved uncertainty estimation. We observe that our method achieves unprecedented success on detection of out-of-distribution queries and endurance against adversarial perturbations.", "bibtex": "@inproceedings{NEURIPS2018_a981f2b7,\n author = {Sensoy, Murat and Kaplan, Lance and Kandemir, Melih},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Evidential Deep Learning to Quantify Classification Uncertainty},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a981f2b708044d6fb4a71a1463242520-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a981f2b708044d6fb4a71a1463242520-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a981f2b708044d6fb4a71a1463242520-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a981f2b708044d6fb4a71a1463242520-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a981f2b708044d6fb4a71a1463242520-Reviews.html", "metareview": "", "pdf_size": 581001, "gs_citation": 1335, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15240614022050844112&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Ozyegin University, Turkey; US Army Research Lab, Adelphi, MD 20783, USA; Bosch Center for Artificial Intelligence, Robert-Bosch-Campus 1, 71272 Renningen, Germany", "aff_domain": "ozyegin.edu.tr;ieee.org;bosch.com", "email": "ozyegin.edu.tr;ieee.org;bosch.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a981f2b708044d6fb4a71a1463242520-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Ozyegin University;US Army Research Laboratory;Bosch Center for Artificial Intelligence", "aff_unique_dep": ";;Artificial Intelligence", "aff_unique_url": "https://www.ozyegin.edu.tr;https://www.arl.army.mil;https://www.bosch-ai.com", "aff_unique_abbr": ";ARL;BCAI", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Adelphi;Robert-Bosch-Campus", "aff_country_unique_index": "0;1;2", "aff_country_unique": "T\u00fcrkiye;United States;Germany" }, { "title": "Evolution-Guided Policy Gradient in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11137", "id": "11137", "author_site": "Shauharda Khadka, Kagan Tumer", "author": "Shauharda Khadka; Kagan Tumer", "abstract": "Deep Reinforcement Learning (DRL) algorithms have been successfully applied to a range of challenging control tasks. However, these methods typically suffer from three core difficulties: temporal credit assignment with sparse rewards, lack of effective exploration, and brittle convergence properties that are extremely sensitive to hyperparameters. Collectively, these challenges severely limit the applicability of these approaches to real world problems. Evolutionary Algorithms (EAs), a class of black box optimization techniques inspired by natural evolution, are well suited to address each of these three challenges. However, EAs typically suffer from high sample complexity and struggle to solve problems that require optimization of a large number of parameters. In this paper, we introduce Evolutionary Reinforcement Learning (ERL), a hybrid algorithm that leverages the population of an EA to provide diversified data to train an RL agent, and reinserts the RL agent into the EA population periodically to inject gradient information into the EA. ERL inherits EA's ability of temporal credit assignment with a fitness metric, effective exploration with a diverse set of policies, and stability of a population-based approach and complements it with off-policy DRL's ability to leverage gradients for higher sample efficiency and faster learning. Experiments in a range of challenging continuous control benchmarks demonstrate that ERL significantly outperforms prior DRL and EA methods.", "bibtex": "@inproceedings{NEURIPS2018_85fc37b1,\n author = {Khadka, Shauharda and Tumer, Kagan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Evolution-Guided Policy Gradient in Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/85fc37b18c57097425b52fc7afbb6969-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/85fc37b18c57097425b52fc7afbb6969-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/85fc37b18c57097425b52fc7afbb6969-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/85fc37b18c57097425b52fc7afbb6969-Reviews.html", "metareview": "", "pdf_size": 921414, "gs_citation": 305, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7920725821302044195&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Collaborative Robotics and Intelligent Systems Institute, Oregon State University; Collaborative Robotics and Intelligent Systems Institute, Oregon State University", "aff_domain": "oregonstate.edu;oregonstate.edu", "email": "oregonstate.edu;oregonstate.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/85fc37b18c57097425b52fc7afbb6969-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Oregon State University", "aff_unique_dep": "Collaborative Robotics and Intelligent Systems Institute", "aff_unique_url": "https://oregonstate.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Evolutionary Stochastic Gradient Descent for Optimization of Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11587", "id": "11587", "author_site": "Xiaodong Cui, Wei Zhang, Zolt\u00e1n T\u00fcske, Michael Picheny", "author": "Xiaodong Cui; Wei Zhang; Zolt\u00e1n T\u00fcske; Michael Picheny", "abstract": "We propose a population-based Evolutionary Stochastic Gradient Descent (ESGD) framework for optimizing deep neural networks. ESGD combines SGD and gradient-free evolutionary algorithms as complementary algorithms in one framework in which the optimization alternates between the SGD step and evolution step to improve the average fitness of the population. With a back-off strategy in the SGD step and an elitist strategy in the evolution step, it guarantees that the best fitness in the population will never degrade. In addition, individuals in the population optimized with various SGD-based optimizers using distinct hyper-parameters in the SGD step are considered as competing species in a coevolution setting such that the complementarity of the optimizers is also taken into account. The effectiveness of ESGD is demonstrated across multiple applications including speech recognition, image recognition and language modeling, using networks with a variety of deep architectures.", "bibtex": "@inproceedings{NEURIPS2018_62da8c91,\n author = {Cui, Xiaodong and Zhang, Wei and T\\\"{u}ske, Zolt\\'{a}n and Picheny, Michael},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Evolutionary Stochastic Gradient Descent for Optimization of Deep Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/62da8c91ce7b10846231921795d6059e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/62da8c91ce7b10846231921795d6059e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/62da8c91ce7b10846231921795d6059e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/62da8c91ce7b10846231921795d6059e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/62da8c91ce7b10846231921795d6059e-Reviews.html", "metareview": "", "pdf_size": 337378, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1564610246761510640&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "IBM Research AI; IBM T. J. Watson Research Center; IBM T. J. Watson Research Center; IBM T. J. Watson Research Center", "aff_domain": "us.ibm.com;us.ibm.com;ibm.com;us.ibm.com", "email": "us.ibm.com;us.ibm.com;ibm.com;us.ibm.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/62da8c91ce7b10846231921795d6059e-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "IBM", "aff_unique_dep": "AI", "aff_unique_url": "https://www.ibm.com/research", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";T. J. Watson", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Evolved Policy Gradients", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11527", "id": "11527", "author_site": "Rein Houthooft, Yuhua Chen, Phillip Isola, Bradly Stadie, Filip Wolski, OpenAI Jonathan Ho, Pieter Abbeel", "author": "Rein Houthooft; Yuhua Chen; Phillip Isola; Bradly Stadie; Filip Wolski; OpenAI Jonathan Ho; Pieter Abbeel", "abstract": "We propose a metalearning approach for learning gradient-based reinforcement learning (RL) algorithms. The idea is to evolve a differentiable loss function, such that an agent, which optimizes its policy to minimize this loss, will achieve high rewards. The loss is parametrized via temporal convolutions over the agent's experience. Because this loss is highly flexible in its ability to take into account the agent's history, it enables fast task learning. Empirical results show that our evolved policy gradient algorithm (EPG) achieves faster learning on several randomized environments compared to an off-the-shelf policy gradient method. We also demonstrate that EPG's learned loss can generalize to out-of-distribution test time tasks, and exhibits qualitatively different behavior from other popular metalearning algorithms.", "bibtex": "@inproceedings{NEURIPS2018_7876acb6,\n author = {Houthooft, Rein and Chen, Yuhua and Isola, Phillip and Stadie, Bradly and Wolski, Filip and Jonathan Ho, OpenAI and Abbeel, Pieter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Evolved Policy Gradients},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7876acb66640bad41f1e1371ef30c180-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7876acb66640bad41f1e1371ef30c180-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7876acb66640bad41f1e1371ef30c180-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7876acb66640bad41f1e1371ef30c180-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7876acb66640bad41f1e1371ef30c180-Reviews.html", "metareview": "", "pdf_size": 1779803, "gs_citation": 302, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17605986776756195620&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "OpenAI\u2217; OpenAI\u2217; OpenAI\u2217+MIT\u00d7; OpenAI\u2217+UC Berkeley\u2020; OpenAI\u2217; OpenAI\u2217+UC Berkeley\u2020; UC Berkeley\u2020", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7876acb66640bad41f1e1371ef30c180-Abstract.html", "aff_unique_index": "0;0;0+1;0+2;0;0+2;2", "aff_unique_norm": "OpenAI;Massachusetts Institute of Technology;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://openai.com;https://web.mit.edu;https://www.berkeley.edu", "aff_unique_abbr": "OpenAI;MIT;UC Berkeley", "aff_campus_unique_index": ";1;1;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0+0;0+0;0;0+0;0", "aff_country_unique": "United States" }, { "title": "Ex ante coordination and collusion in zero-sum multi-player extensive-form games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11916", "id": "11916", "author_site": "Gabriele Farina, Andrea Celli, Nicola Gatti, Tuomas Sandholm", "author": "Gabriele Farina; Andrea Celli; Nicola Gatti; Tuomas Sandholm", "abstract": "Recent milestones in equilibrium computation, such as the success of Libratus, show that it is possible to compute strong solutions to two-player zero-sum games in theory and practice. This is not the case for games with more than two players, which remain one of the main open challenges in computational game theory. This paper focuses on zero-sum games where a team of players faces an opponent, as is the case, for example, in Bridge, collusion in poker, and many non-recreational applications such as war, where the colluders do not have time or means of communicating during battle, collusion in bidding, where communication during the auction is illegal, and coordinated swindling in public. The possibility for the team members to communicate before game play\u2014that is, coordinate their strategies ex ante\u2014makes the use of behavioral strategies unsatisfactory. The reasons for this are closely related to the fact that the team can be represented as a single player with imperfect recall. We propose a new game representation, the realization form, that generalizes the sequence form but can also be applied to imperfect-recall games. Then, we use it to derive an auxiliary game that is equivalent to the original one. It provides a sound way to map the problem of finding an optimal ex-ante-correlated strategy for the team to the well-understood Nash equilibrium-finding problem in a (larger) two-player zero-sum perfect-recall game. By reasoning over the auxiliary game, we devise an anytime algorithm, fictitious team-play, that is guaranteed to converge to an optimal coordinated strategy for the team against an optimal opponent, and that is dramatically faster than the prior state-of-the-art algorithm for this problem.", "bibtex": "@inproceedings{NEURIPS2018_c17028c9,\n author = {Farina, Gabriele and Celli, Andrea and Gatti, Nicola and Sandholm, Tuomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Ex ante coordination and collusion in zero-sum multi-player extensive-form games},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c17028c9b6e0c5deaad29665d582284a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c17028c9b6e0c5deaad29665d582284a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c17028c9b6e0c5deaad29665d582284a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c17028c9b6e0c5deaad29665d582284a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c17028c9b6e0c5deaad29665d582284a-Reviews.html", "metareview": "", "pdf_size": 439659, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17815706749410563361&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Computer Science Department, Carnegie Mellon University; DEIB, Politecnico di Milano; DEIB, Politecnico di Milano; Computer Science Department, Carnegie Mellon University", "aff_domain": "cs.cmu.edu;polimi.it;polimi.it;cs.cmu.edu", "email": "cs.cmu.edu;polimi.it;polimi.it;cs.cmu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c17028c9b6e0c5deaad29665d582284a-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Carnegie Mellon University;Politecnico di Milano", "aff_unique_dep": "Computer Science Department;DEIB", "aff_unique_url": "https://www.cmu.edu;https://www.polimi.it", "aff_unique_abbr": "CMU;Politecnico di Milano", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;Italy" }, { "title": "Exact natural gradient in deep linear networks and its application to the nonlinear case", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11577", "id": "11577", "author_site": "Alberto Bernacchia, Mate Lengyel, Guillaume Hennequin", "author": "Alberto Bernacchia; Mate Lengyel; Guillaume Hennequin", "abstract": "Stochastic gradient descent (SGD) remains the method of choice for deep learning, despite the limitations arising for ill-behaved objective functions. In cases where it could be estimated, the natural gradient has proven very effective at mitigating the catastrophic effects of pathological curvature in the objective function, but little is known theoretically about its convergence properties, and it has yet to find a practical implementation that would scale to very deep and large networks. Here, we derive an exact expression for the natural gradient in deep linear networks, which exhibit pathological curvature similar to the nonlinear case. We provide for the first time an analytical solution for its convergence rate, showing that the loss decreases exponentially to the global minimum in parameter space. Our expression for the natural gradient is surprisingly simple, computationally tractable, and explains why some approximations proposed previously work well in practice. This opens new avenues for approximating the natural gradient in the nonlinear case, and we show in preliminary experiments that our online natural gradient descent outperforms SGD on MNIST autoencoding while sharing its computational simplicity.", "bibtex": "@inproceedings{NEURIPS2018_7f018eb7,\n author = {Bernacchia, Alberto and Lengyel, Mate and Hennequin, Guillaume},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Exact natural gradient in deep linear networks and its application to the nonlinear case},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7f018eb7b301a66658931cb8a93fd6e8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7f018eb7b301a66658931cb8a93fd6e8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7f018eb7b301a66658931cb8a93fd6e8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7f018eb7b301a66658931cb8a93fd6e8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7f018eb7b301a66658931cb8a93fd6e8-Reviews.html", "metareview": "", "pdf_size": 395086, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14287307043837167218&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Department of Engineering, University of Cambridge, Cambridge, UK, CB2 1PZ; Department of Engineering, University of Cambridge, Cambridge CB2 1PZ, UK+Department of Cognitive Science, Central European University, Budapest H-1051, Hungary; Department of Engineering, University of Cambridge, Cambridge, UK, CB2 1PZ", "aff_domain": "cam.ac.uk;eng.cam.ac.uk;eng.cam.ac.uk", "email": "cam.ac.uk;eng.cam.ac.uk;eng.cam.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7f018eb7b301a66658931cb8a93fd6e8-Abstract.html", "aff_unique_index": "0;0+1;0", "aff_unique_norm": "University of Cambridge;Central European University", "aff_unique_dep": "Department of Engineering;Department of Cognitive Science", "aff_unique_url": "https://www.cam.ac.uk;https://www.ceu.edu", "aff_unique_abbr": "Cambridge;CEU", "aff_campus_unique_index": "0;0+1;0", "aff_campus_unique": "Cambridge;Budapest", "aff_country_unique_index": "0;0+1;0", "aff_country_unique": "United Kingdom;Hungary" }, { "title": "Expanding Holographic Embeddings for Knowledge Completion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11443", "id": "11443", "author_site": "Yexiang Xue, Yang Yuan, Zhitian Xu, Ashish Sabharwal", "author": "Yexiang Xue; Yang Yuan; Zhitian Xu; Ashish Sabharwal", "abstract": "Neural models operating over structured spaces such as knowledge graphs require a continuous embedding of the discrete elements of this space (such as entities) as well as the relationships between them. Relational embeddings with high expressivity, however, have high model complexity, making them computationally difficult to train. We propose a new family of embeddings for knowledge graphs that interpolate between a method with high model complexity and one, namely Holographic embeddings (HolE), with low dimensionality and high training efficiency. This interpolation, termed HolEx, is achieved by concatenating several linearly perturbed copies of original HolE. We formally characterize the number of perturbed copies needed to provably recover the full entity-entity or entity-relation interaction matrix, leveraging ideas from Haar wavelets and compressed sensing. In practice, using just a handful of Haar-based or random perturbation vectors results in a much stronger knowledge completion system. On the Freebase FB15K dataset, HolEx outperforms originally reported HolE by 14.7\\% on the HITS@10 metric, and the current path-based state-of-the-art method, PTransE, by 4\\% (absolute).", "bibtex": "@inproceedings{NEURIPS2018_dd28e506,\n author = {Xue, Yexiang and Yuan, Yang and Xu, Zhitian and Sabharwal, Ashish},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Expanding Holographic Embeddings for Knowledge Completion},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dd28e50635038e9cf3a648c2dd17ad0a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dd28e50635038e9cf3a648c2dd17ad0a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dd28e50635038e9cf3a648c2dd17ad0a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dd28e50635038e9cf3a648c2dd17ad0a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dd28e50635038e9cf3a648c2dd17ad0a-Reviews.html", "metareview": "", "pdf_size": 426820, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12857926112554194163&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dd28e50635038e9cf3a648c2dd17ad0a-Abstract.html" }, { "title": "Experimental Design for Cost-Aware Learning of Causal Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11516", "id": "11516", "author_site": "Erik Lindgren, Murat Kocaoglu, Alex Dimakis, Sriram Vishwanath", "author": "Erik Lindgren; Murat Kocaoglu; Alexandros G Dimakis; Sriram Vishwanath", "abstract": "We consider the minimum cost intervention design problem: Given the essential graph of a causal graph and a cost to intervene on a variable, identify the set of interventions with minimum total cost that can learn any causal graph with the given essential graph. We first show that this problem is NP-hard. We then prove that we can achieve a constant factor approximation to this problem with a greedy algorithm. We then constrain the sparsity of each intervention. We develop an algorithm that returns an intervention design that is nearly optimal in terms of size for sparse graphs with sparse interventions and we discuss how to use it when there are costs on the vertices.", "bibtex": "@inproceedings{NEURIPS2018_ba3e9b6a,\n author = {Lindgren, Erik and Kocaoglu, Murat and Dimakis, Alexandros G and Vishwanath, Sriram},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Experimental Design for Cost-Aware Learning of Causal Graphs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ba3e9b6a519cfddc560b5d53210df1bd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ba3e9b6a519cfddc560b5d53210df1bd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ba3e9b6a519cfddc560b5d53210df1bd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ba3e9b6a519cfddc560b5d53210df1bd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ba3e9b6a519cfddc560b5d53210df1bd-Reviews.html", "metareview": "", "pdf_size": 384285, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6352940271590219890&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of Texas at Austin; MIT-IBM Watson AI Lab; University of Texas at Austin; University of Texas at Austin", "aff_domain": "utexas.edu;ibm.com;austin.utexas.edu;ece.utexas.edu", "email": "utexas.edu;ibm.com;austin.utexas.edu;ece.utexas.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ba3e9b6a519cfddc560b5d53210df1bd-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Texas at Austin;Massachusetts Institute of Technology", "aff_unique_dep": ";IBM Watson AI Lab", "aff_unique_url": "https://www.utexas.edu;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "UT Austin;MIT-IBM AI Lab", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Explaining Deep Learning Models -- A Bayesian Non-parametric Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11445", "id": "11445", "author_site": "Wenbo Guo, Sui Huang, Yunzhe Tao, Xinyu Xing, Lin Lin", "author": "Wenbo Guo; Sui Huang; Yunzhe Tao; Xinyu Xing; Lin Lin", "abstract": "Understanding and interpreting how machine learning (ML) models make decisions have been a big challenge. While recent research has proposed various technical approaches to provide some clues as to how an ML model makes individual predictions, they cannot provide users with an ability to inspect a model as a complete entity. In this work, we propose a novel technical approach that augments a Bayesian non-parametric regression mixture model with multiple elastic nets. Using the enhanced mixture model, we can extract generalizable insights for a target model through a global approximation. To demonstrate the utility of our approach, we evaluate it on different ML models in the context of image recognition. The empirical results indicate that our proposed approach not only outperforms the state-of-the-art techniques in explaining individual decisions but also provides users with an ability to discover the vulnerabilities of the target ML models.", "bibtex": "@inproceedings{NEURIPS2018_4b4edc26,\n author = {Guo, Wenbo and Huang, Sui and Tao, Yunzhe and Xing, Xinyu and Lin, Lin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Explaining Deep Learning Models -- A Bayesian Non-parametric Approach},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4b4edc2630fe75800ddc29a7b4070add-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4b4edc2630fe75800ddc29a7b4070add-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4b4edc2630fe75800ddc29a7b4070add-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4b4edc2630fe75800ddc29a7b4070add-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4b4edc2630fe75800ddc29a7b4070add-Reviews.html", "metareview": "", "pdf_size": 2064216, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7783234340966171458&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "The Pennsylvania State University; Netflix Inc.; Columbia University; The Pennsylvania State University; The Pennsylvania State University", "aff_domain": "ist.psu.edu;netflix.com;columbia.edu;ist.psu.edu;psu.edu", "email": "ist.psu.edu;netflix.com;columbia.edu;ist.psu.edu;psu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4b4edc2630fe75800ddc29a7b4070add-Abstract.html", "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Pennsylvania State University;Netflix;Columbia University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.psu.edu;https://www.netflix.com;https://www.columbia.edu", "aff_unique_abbr": "PSU;Netflix;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Explanations based on the Missing: Towards Contrastive Explanations with Pertinent Negatives", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11082", "id": "11082", "author_site": "Amit Dhurandhar, Pin-Yu Chen, Ronny Luss, Chun-Chen Tu, Paishun Ting, Karthikeyan Shanmugam, Payel Das", "author": "Amit Dhurandhar; Pin-Yu Chen; Ronny Luss; Chun-Chen Tu; Paishun Ting; Karthikeyan Shanmugam; Payel Das", "abstract": "In this paper we propose a novel method that provides contrastive explanations justifying the classification of an input by a black box classifier such as a deep neural network. Given an input we find what should be minimally and sufficiently present (viz. important object pixels in an image) to justify its classification and analogously what should be minimally and necessarily \\emph{absent} (viz. certain background pixels). We argue that such explanations are natural for humans and are used commonly in domains such as health care and criminology. What is minimally but critically \\emph{absent} is an important part of an explanation, which to the best of our knowledge, has not been explicitly identified by current explanation methods that explain predictions of neural networks. We validate our approach on three real datasets obtained from diverse domains; namely, a handwritten digits dataset MNIST, a large procurement fraud dataset and a brain activity strength dataset. In all three cases, we witness the power of our approach in generating precise explanations that are also easy for human experts to understand and evaluate.", "bibtex": "@inproceedings{NEURIPS2018_c5ff2543,\n author = {Dhurandhar, Amit and Chen, Pin-Yu and Luss, Ronny and Tu, Chun-Chen and Ting, Paishun and Shanmugam, Karthikeyan and Das, Payel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Explanations based on the Missing: Towards Contrastive Explanations with Pertinent Negatives},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c5ff2543b53f4cc0ad3819a36752467b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c5ff2543b53f4cc0ad3819a36752467b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c5ff2543b53f4cc0ad3819a36752467b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c5ff2543b53f4cc0ad3819a36752467b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c5ff2543b53f4cc0ad3819a36752467b-Reviews.html", "metareview": "", "pdf_size": 591292, "gs_citation": 805, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14566322531022731329&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "IBM Research; IBM Research; IBM Research; University of Michigan; University of Michigan; IBM Research; IBM Research", "aff_domain": "us.ibm.com;ibm.com;us.ibm.com;umich.edu;umich.edu;ibm.com;us.ibm.com", "email": "us.ibm.com;ibm.com;us.ibm.com;umich.edu;umich.edu;ibm.com;us.ibm.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c5ff2543b53f4cc0ad3819a36752467b-Abstract.html", "aff_unique_index": "0;0;0;1;1;0;0", "aff_unique_norm": "IBM;University of Michigan", "aff_unique_dep": "IBM Research;", "aff_unique_url": "https://www.ibm.com/research;https://www.umich.edu", "aff_unique_abbr": "IBM;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Exploiting Numerical Sparsity for Efficient Learning : Faster Eigenvector Computation and Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11515", "id": "11515", "author_site": "Neha Gupta, Aaron Sidford", "author": "Neha Gupta; Aaron Sidford", "abstract": "In this paper, we obtain improved running times for regression and top eigenvector computation for numerically sparse matrices. Given a data matrix $\\mat{A} \\in \\R^{n \\times d}$ where every row $a \\in \\R^d$ has $\\|a\\|_2^2 \\leq L$ and numerical sparsity $\\leq s$, i.e. $\\|a\\|_1^2 / \\|a\\|_2^2 \\leq s$, we provide faster algorithms for these problems for many parameter settings.\n\nFor top eigenvector computation, when $\\gap > 0$ is the relative gap between the top two eigenvectors of $\\mat{A}^\\top \\mat{A}$ and $r$ is the stable rank of $\\mat{A}$ we obtain a running time of $\\otilde(nd + r(s + \\sqrt{r s}) / \\gap^2)$ improving upon the previous best unaccelerated running time of $O(nd + r d / \\gap^2)$. As $r \\leq d$ and $s \\leq d$ our algorithm everywhere improves or matches the previous bounds for all parameter settings.\n\nFor regression, when $\\mu > 0$ is the smallest eigenvalue of $\\mat{A}^\\top \\mat{A}$ we obtain a running time of $\\otilde(nd + (nL / \\mu) \\sqrt{s nL / \\mu})$ improving upon the previous best unaccelerated running time of $\\otilde(nd + n L d / \\mu)$. This result expands when regression can be solved in nearly linear time from when $L/\\mu = \\otilde(1)$ to when $L / \\mu = \\otilde(d^{2/3} / (sn)^{1/3})$.\n\nFurthermore, we obtain similar improvements even when row norms and numerical sparsities are non-uniform and we show how to achieve even faster running times by accelerating using approximate proximal point \\cite{frostig2015regularizing} / catalyst \\cite{lin2015universal}. Our running times depend only on the size of the input and natural numerical measures of the matrix, i.e. eigenvalues and $\\ell_p$ norms, making progress on a key open problem regarding optimal running times for efficient large-scale learning.", "bibtex": "@inproceedings{NEURIPS2018_4a1590df,\n author = {Gupta, Neha and Sidford, Aaron},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Exploiting Numerical Sparsity for Efficient Learning : Faster Eigenvector Computation and Regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4a1590df1d5968d41b855005bb8b67bf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4a1590df1d5968d41b855005bb8b67bf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4a1590df1d5968d41b855005bb8b67bf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4a1590df1d5968d41b855005bb8b67bf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4a1590df1d5968d41b855005bb8b67bf-Reviews.html", "metareview": "", "pdf_size": 414425, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10754584829474087028&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, Stanford University; Department of Management Science and Engineering, Stanford University", "aff_domain": "cs.stanford.edu;stanford.edu", "email": "cs.stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4a1590df1d5968d41b855005bb8b67bf-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Exploration in Structured Reinforcement Learning", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11847", "id": "11847", "author_site": "Jungseul Ok, Alexandre Proutiere, Damianos Tranos", "author": "Jungseul Ok; Alexandre Proutiere; Damianos Tranos", "abstract": "We address reinforcement learning problems with finite state and action spaces where the underlying MDP has some known structure that could be potentially exploited to minimize the exploration rates of suboptimal (state, action) pairs. For any arbitrary structure, we derive problem-specific regret lower bounds satisfied by any learning algorithm. These lower bounds are made explicit for unstructured MDPs and for those whose transition probabilities and average reward functions are Lipschitz continuous w.r.t. the state and action. For Lipschitz MDPs, the bounds are shown not to scale with the sizes S and A of the state and action spaces, i.e., they are smaller than c log T where T is the time horizon and the constant c only depends on the Lipschitz structure, the span of the bias function, and the minimal action sub-optimality gap. This contrasts with unstructured MDPs where the regret lower bound typically scales as SA log T. We devise DEL (Directed Exploration Learning), an algorithm that matches our regret lower bounds. We further simplify the algorithm for Lipschitz MDPs, and show that the simplified version is still able to efficiently exploit the structure.", "bibtex": "@inproceedings{NEURIPS2018_d693d554,\n author = {Ok, Jungseul and Proutiere, Alexandre and Tranos, Damianos},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Exploration in Structured Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d693d554e0ede0d75f7d2873b015f228-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d693d554e0ede0d75f7d2873b015f228-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d693d554e0ede0d75f7d2873b015f228-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d693d554e0ede0d75f7d2873b015f228-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d693d554e0ede0d75f7d2873b015f228-Reviews.html", "metareview": "", "pdf_size": 333553, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12447196536199850381&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "KTH, EECS; KTH, EECS; KTH, EECS", "aff_domain": "illinois.edu;kth.se;kth.se", "email": "illinois.edu;kth.se;kth.se", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d693d554e0ede0d75f7d2873b015f228-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "KTH Royal Institute of Technology", "aff_unique_dep": "School of Electrical Engineering and Computer Science", "aff_unique_url": "https://www.kth.se", "aff_unique_abbr": "KTH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Sweden" }, { "title": "Exponentially Weighted Imitation Learning for Batched Historical Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11609", "id": "11609", "author_site": "Qing Wang, Jiechao Xiong, Lei Han, peng sun, Han Liu, Tong Zhang", "author": "Qing Wang; Jiechao Xiong; Lei Han; peng sun; Han Liu; Tong Zhang", "abstract": "We consider deep policy learning with only batched historical trajectories. The main challenge of this problem is that the learner no longer has a simulator or ``environment oracle'' as in most reinforcement learning settings. To solve this problem, we propose a monotonic advantage reweighted imitation learning strategy that is applicable to problems with complex nonlinear function approximation and works well with hybrid (discrete and continuous) action space. The method does not rely on the knowledge of the behavior policy, thus can be used to learn from data generated by an unknown policy. Under mild conditions, our algorithm, though surprisingly simple, has a policy improvement bound and outperforms most competing methods empirically. Thorough numerical results are also provided to demonstrate the efficacy of the proposed methodology.", "bibtex": "@inproceedings{NEURIPS2018_4aec1b34,\n author = {Wang, Qing and Xiong, Jiechao and Han, Lei and sun, peng and Liu, Han and Zhang, Tong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Exponentially Weighted Imitation Learning for Batched Historical Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4aec1b3435c52abbdf8334ea0e7141e0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4aec1b3435c52abbdf8334ea0e7141e0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4aec1b3435c52abbdf8334ea0e7141e0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4aec1b3435c52abbdf8334ea0e7141e0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4aec1b3435c52abbdf8334ea0e7141e0-Reviews.html", "metareview": "", "pdf_size": 462566, "gs_citation": 134, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3758340759127048753&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Tencent AI Lab; Tencent AI Lab; Tencent AI Lab; Tencent AI Lab; Tencent AI Lab + Northwestern University; Tencent AI Lab", "aff_domain": "tencent.com;tencent.com;tencent.com;tencent.com;northwestern.edu;tongzhang-ml.org", "email": "tencent.com;tencent.com;tencent.com;tencent.com;northwestern.edu;tongzhang-ml.org", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4aec1b3435c52abbdf8334ea0e7141e0-Abstract.html", "aff_unique_index": "0;0;0;0;0+1;0", "aff_unique_norm": "Tencent;Northwestern University", "aff_unique_dep": "Tencent AI Lab;", "aff_unique_url": "https://ai.tencent.com;https://www.northwestern.edu", "aff_unique_abbr": "Tencent AI Lab;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0+1;0", "aff_country_unique": "China;United States" }, { "title": "Exponentiated Strongly Rayleigh Distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11440", "id": "11440", "author_site": "Zelda Mariet, Suvrit Sra, Stefanie Jegelka", "author": "Zelda E. Mariet; Suvrit Sra; Stefanie Jegelka", "abstract": "Strongly Rayleigh (SR) measures are discrete probability distributions over the subsets of a ground set. They enjoy strong negative dependence properties, as a result of which they assign higher probability to subsets of diverse elements. We introduce in this paper Exponentiated Strongly Rayleigh (ESR) measures, which sharpen (or smoothen) the negative dependence property of SR measures via a single parameter (the exponent) that can intuitively understood as an inverse temperature. We develop efficient MCMC procedures for approximate sampling from ESRs, and obtain explicit mixing time bounds for two concrete instances: exponentiated versions of Determinantal Point Processes and Dual Volume Sampling. We illustrate some of the potential of ESRs, by applying them to a few machine learning tasks; empirical results confirm that beyond their theoretical appeal, ESR-based models hold significant promise for these tasks.", "bibtex": "@inproceedings{NEURIPS2018_1c6a0198,\n author = {Mariet, Zelda E. and Sra, Suvrit and Jegelka, Stefanie},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Exponentiated Strongly Rayleigh Distributions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1c6a0198177bfcc9bd93f6aab94aad3c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1c6a0198177bfcc9bd93f6aab94aad3c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1c6a0198177bfcc9bd93f6aab94aad3c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1c6a0198177bfcc9bd93f6aab94aad3c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1c6a0198177bfcc9bd93f6aab94aad3c-Reviews.html", "metareview": "", "pdf_size": 529134, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9649845538199918705&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Massachusetts Institute of Technology; Massachusetts Institute of Technology; Massachusetts Institute of Technology", "aff_domain": "csail.mit.edu;mit.edu;csail.mit.edu", "email": "csail.mit.edu;mit.edu;csail.mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1c6a0198177bfcc9bd93f6aab94aad3c-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Extracting Relationships by Multi-Domain Matching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11656", "id": "11656", "author_site": "Yitong Li, michael Murias, geraldine Dawson, David Carlson", "author": "Yitong Li; michael Murias; geraldine Dawson; David E Carlson", "abstract": "In many biological and medical contexts, we construct a large labeled corpus by aggregating many sources to use in target prediction tasks. Unfortunately, many of the sources may be irrelevant to our target task, so ignoring the structure of the dataset is detrimental. This work proposes a novel approach, the Multiple Domain Matching Network (MDMN), to exploit this structure. MDMN embeds all data into a shared feature space while learning which domains share strong statistical relationships. These relationships are often insightful in their own right, and they allow domains to share strength without interference from irrelevant data. This methodology builds on existing distribution-matching approaches by assuming that source domains are varied and outcomes multi-factorial. Therefore, each domain should only match a relevant subset. Theoretical analysis shows that the proposed approach can have a tighter generalization bound than existing multiple-domain adaptation approaches. Empirically, we show that the proposed methodology handles higher numbers of source domains (up to 21 empirically), and provides state-of-the-art performance on image, text, and multi-channel time series classification, including clinically relevant data of a novel treatment of Autism Spectrum Disorder.", "bibtex": "@inproceedings{NEURIPS2018_2fd0fd3e,\n author = {Li, Yitong and Murias, michael and Dawson, geraldine and Carlson, David E},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Extracting Relationships by Multi-Domain Matching},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2fd0fd3efa7c4cfb034317b21f3c2d93-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2fd0fd3efa7c4cfb034317b21f3c2d93-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2fd0fd3efa7c4cfb034317b21f3c2d93-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2fd0fd3efa7c4cfb034317b21f3c2d93-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2fd0fd3efa7c4cfb034317b21f3c2d93-Reviews.html", "metareview": "", "pdf_size": 1408096, "gs_citation": 124, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10533202500367250207&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2fd0fd3efa7c4cfb034317b21f3c2d93-Abstract.html" }, { "title": "FD-GAN: Pose-guided Feature Distilling GAN for Robust Person Re-identification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11140", "id": "11140", "author_site": "Yixiao Ge, Zhuowan Li, Haiyu Zhao, Guojun Yin, Shuai Yi, Xiaogang Wang, Hongsheng Li", "author": "Yixiao Ge; Zhuowan Li; Haiyu Zhao; Guojun Yin; Shuai Yi; Xiaogang Wang; hongsheng Li", "abstract": "Person re-identification (reID) is an important task that requires to retrieve a person's images from an image dataset, given one image of the person of interest. For learning robust person features, the pose variation of person images is one of the key challenges. Existing works targeting the problem either perform human alignment, or learn human-region-based representations. Extra pose information and computational cost is generally required for inference. To solve this issue, a Feature Distilling Generative Adversarial Network (FD-GAN) is proposed for learning identity-related and pose-unrelated representations. It is a novel framework based on a Siamese structure with multiple novel discriminators on human poses and identities. In addition to the discriminators, a novel same-pose loss is also integrated, which requires appearance of a same person's generated images to be similar. After learning pose-unrelated person features with pose guidance, no auxiliary pose information and additional computational cost is required during testing. Our proposed FD-GAN achieves state-of-the-art performance on three person reID datasets, which demonstrates that the effectiveness and robust feature distilling capability of the proposed FD-GAN.", "bibtex": "@inproceedings{NEURIPS2018_c5ab0bc6,\n author = {Ge, Yixiao and Li, Zhuowan and Zhao, Haiyu and Yin, Guojun and Yi, Shuai and Wang, Xiaogang and Li, hongsheng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {FD-GAN: Pose-guided Feature Distilling GAN for Robust Person Re-identification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c5ab0bc60ac7929182aadd08703f1ec6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c5ab0bc60ac7929182aadd08703f1ec6-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c5ab0bc60ac7929182aadd08703f1ec6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c5ab0bc60ac7929182aadd08703f1ec6-Reviews.html", "metareview": "", "pdf_size": 2266424, "gs_citation": 448, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8848217033553196180&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "CUHK-SenseTime Joint Laboratory, The Chinese University of Hong Kong; SenseTime Research+Johns Hopkins University; SenseTime Research; University of Science and Technology of China+SenseTime Research; SenseTime Research; CUHK-SenseTime Joint Laboratory, The Chinese University of Hong Kong; CUHK-SenseTime Joint Laboratory, The Chinese University of Hong Kong", "aff_domain": "link.cuhk.edu.hk;jhu.edu;sensetime.com;mail.ustc.edu.cn;sensetime.com;ee.cuhk.edu.hk;ee.cuhk.edu.hk", "email": "link.cuhk.edu.hk;jhu.edu;sensetime.com;mail.ustc.edu.cn;sensetime.com;ee.cuhk.edu.hk;ee.cuhk.edu.hk", "github": "https://github.com/yxgeee/FD-GAN", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c5ab0bc60ac7929182aadd08703f1ec6-Abstract.html", "aff_unique_index": "0;1+2;1;3+1;1;0;0", "aff_unique_norm": "Chinese University of Hong Kong;SenseTime;Johns Hopkins University;University of Science and Technology of China", "aff_unique_dep": "CUHK-SenseTime Joint Laboratory;SenseTime Research;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.sensetime.com;https://www.jhu.edu;http://www.ustc.edu.cn", "aff_unique_abbr": "CUHK;SenseTime;JHU;USTC", "aff_campus_unique_index": "0;;;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0+1;0;0+0;0;0;0", "aff_country_unique": "China;United States" }, { "title": "FRAGE: Frequency-Agnostic Word Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11150", "id": "11150", "author_site": "Chengyue Gong, Di He, Xu Tan, Tao Qin, Liwei Wang, Tie-Yan Liu", "author": "Chengyue Gong; Di He; Xu Tan; Tao Qin; Liwei Wang; Tie-Yan Liu", "abstract": "Continuous word representation (aka word embedding) is a basic building block in many neural network-based models used in natural language processing tasks. Although it is widely accepted that words with similar semantics should be close to each other in the embedding space, we find that word embeddings learned in several tasks are biased towards word frequency: the embeddings of high-frequency and low-frequency words lie in different subregions of the embedding space, and the embedding of a rare word and a popular word can be far from each other even if they are semantically similar. This makes learned word embeddings ineffective, especially for rare words, and consequently limits the performance of these neural network models. In order to mitigate the issue, in this paper, we propose a neat, simple yet effective adversarial training method to blur the boundary between the embeddings of high-frequency words and low-frequency words. We conducted comprehensive studies on ten datasets across four natural language processing tasks, including word similarity, language modeling, machine translation and text classification. Results show that we achieve higher performance than the baselines in all tasks.", "bibtex": "@inproceedings{NEURIPS2018_e555ebe0,\n author = {Gong, Chengyue and He, Di and Tan, Xu and Qin, Tao and Wang, Liwei and Liu, Tie-Yan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {FRAGE: Frequency-Agnostic Word Representation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e555ebe0ce426f7f9b2bef0706315e0c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e555ebe0ce426f7f9b2bef0706315e0c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e555ebe0ce426f7f9b2bef0706315e0c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e555ebe0ce426f7f9b2bef0706315e0c-Reviews.html", "metareview": "", "pdf_size": 1069454, "gs_citation": 185, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=899516517229807927&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Peking University; Key Laboratory of Machine Perception, MOE, School of EECS, Peking University; Microsoft Research Asia; Microsoft Research Asia; Key Laboratory of Machine Perception, MOE, School of EECS, Peking University + Center for Data Science, Peking University, Beijing Institute of Big Data Research; Microsoft Research Asia", "aff_domain": "pku.edu.cn;pku.edu.cn;microsoft.com;microsoft.com;cis.pku.edu.cn;microsoft.com", "email": "pku.edu.cn;pku.edu.cn;microsoft.com;microsoft.com;cis.pku.edu.cn;microsoft.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e555ebe0ce426f7f9b2bef0706315e0c-Abstract.html", "aff_unique_index": "0;0;1;1;0+0;1", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": ";Research", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "Peking U;MSR Asia", "aff_campus_unique_index": "1;1;2;1", "aff_campus_unique": ";Asia;Beijing", "aff_country_unique_index": "0;0;0;0;0+0;0", "aff_country_unique": "China" }, { "title": "Factored Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11290", "id": "11290", "author_site": "Julian Zimmert, Yevgeny Seldin", "author": "Julian Zimmert; Yevgeny Seldin", "abstract": "We introduce the factored bandits model, which is a framework for learning with\nlimited (bandit) feedback, where actions can be decomposed into a Cartesian\nproduct of atomic actions. Factored bandits incorporate rank-1 bandits as a special\ncase, but significantly relax the assumptions on the form of the reward function. We\nprovide an anytime algorithm for stochastic factored bandits and up to constants\nmatching upper and lower regret bounds for the problem. Furthermore, we show\nthat with a slight modification the proposed algorithm can be applied to utility\nbased dueling bandits. We obtain an improvement in the additive terms of the regret\nbound compared to state of the art algorithms (the additive terms are dominating\nup to time horizons which are exponential in the number of arms).", "bibtex": "@inproceedings{NEURIPS2018_226d1f15,\n author = {Zimmert, Julian and Seldin, Yevgeny},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Factored Bandits},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/226d1f15ecd35f784d2a20c3ecf56d7f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/226d1f15ecd35f784d2a20c3ecf56d7f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/226d1f15ecd35f784d2a20c3ecf56d7f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/226d1f15ecd35f784d2a20c3ecf56d7f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/226d1f15ecd35f784d2a20c3ecf56d7f-Reviews.html", "metareview": "", "pdf_size": 505579, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14558746788016206815&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "University of Copenhagen; University of Copenhagen", "aff_domain": "di.ku.dk;di.ku.dk", "email": "di.ku.dk;di.ku.dk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/226d1f15ecd35f784d2a20c3ecf56d7f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Denmark" }, { "title": "Fairness Behind a Veil of Ignorance: A Welfare Analysis for Automated Decision Making", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11144", "id": "11144", "author_site": "Hoda Heidari, Claudio Ferrari, Krishna Gummadi, Andreas Krause", "author": "Hoda Heidari; Claudio Ferrari; Krishna Gummadi; Andreas Krause", "abstract": "We draw attention to an important, yet largely overlooked aspect of evaluating fairness for automated decision making systems---namely risk and welfare considerations. Our proposed family of measures corresponds to the long-established formulations of cardinal social welfare in economics, and is justified by the Rawlsian conception of fairness behind a veil of ignorance. The convex formulation of our welfare-based measures of fairness allows us to integrate them as a constraint into any convex loss minimization pipeline. Our empirical analysis reveals interesting trade-offs between our proposal and (a) prediction accuracy, (b) group discrimination, and (c) Dwork et al's notion of individual fairness. Furthermore and perhaps most importantly, our work provides both heuristic justification and empirical evidence suggesting that a lower-bound on our measures often leads to bounded inequality in algorithmic outcomes; hence presenting the first computationally feasible mechanism for bounding individual-level inequality.", "bibtex": "@inproceedings{NEURIPS2018_be3159ad,\n author = {Heidari, Hoda and Ferrari, Claudio and Gummadi, Krishna and Krause, Andreas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fairness Behind a Veil of Ignorance: A Welfare Analysis for Automated Decision Making},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/be3159ad04564bfb90db9e32851ebf9c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/be3159ad04564bfb90db9e32851ebf9c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/be3159ad04564bfb90db9e32851ebf9c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/be3159ad04564bfb90db9e32851ebf9c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/be3159ad04564bfb90db9e32851ebf9c-Reviews.html", "metareview": "", "pdf_size": 1681731, "gs_citation": 156, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5445996458600650892&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 16, "aff": "ETH Z\u00fcrich; ETH Z\u00fcrich; MPI-SWS; ETH Z\u00fcrich", "aff_domain": "inf.ethz.ch;ethz.ch;mpi-sws.org;ethz.ch", "email": "inf.ethz.ch;ethz.ch;mpi-sws.org;ethz.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/be3159ad04564bfb90db9e32851ebf9c-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "ETH Zurich;Max Planck Institute for Software Systems", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.mpi-sws.org", "aff_unique_abbr": "ETHZ;MPI-SWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Switzerland;Germany" }, { "title": "Fairness Through Computationally-Bounded Awareness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11475", "id": "11475", "author_site": "Michael Kim, Omer Reingold, Guy Rothblum", "author": "Michael Kim; Omer Reingold; Guy Rothblum", "abstract": "We study the problem of fair classification within the versatile framework of Dwork et al. [ITCS '12], which assumes the existence of a metric that measures similarity between pairs of individuals. Unlike earlier work, we do not assume that the entire metric is known to the learning algorithm; instead, the learner can query this", "bibtex": "@inproceedings{NEURIPS2018_c8dfece5,\n author = {Kim, Michael and Reingold, Omer and Rothblum, Guy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fairness Through Computationally-Bounded Awareness},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c8dfece5cc68249206e4690fc4737a8d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c8dfece5cc68249206e4690fc4737a8d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c8dfece5cc68249206e4690fc4737a8d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c8dfece5cc68249206e4690fc4737a8d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c8dfece5cc68249206e4690fc4737a8d-Reviews.html", "metareview": "", "pdf_size": 462438, "gs_citation": 191, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3455632107988491216&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Stanford University; Stanford University; Weizmann Institute of Science", "aff_domain": "cs.stanford.edu;stanford.edu;alum.mit.edu", "email": "cs.stanford.edu;stanford.edu;alum.mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c8dfece5cc68249206e4690fc4737a8d-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Stanford University;Weizmann Institute of Science", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.weizmann.org.il", "aff_unique_abbr": "Stanford;Weizmann", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Faithful Inversion of Generative Models for Effective Amortized Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11312", "id": "11312", "author_site": "Stefan Webb, Adam Golinski, Rob Zinkov, Siddharth N, Thomas Rainforth, Yee Whye Teh, Frank Wood", "author": "Stefan Webb; Adam Golinski; Rob Zinkov; Siddharth N; Tom Rainforth; Yee Whye Teh; Frank Wood", "abstract": "Inference amortization methods share information across multiple posterior-inference problems, allowing each to be carried out more efficiently. Generally, they require the inversion of the dependency structure in the generative model, as the modeller must learn a mapping from observations to distributions approximating the posterior. Previous approaches have involved inverting the dependency structure in a heuristic way that fails to capture these dependencies correctly, thereby limiting the achievable accuracy of the resulting approximations. We introduce an algorithm for faithfully, and minimally, inverting the graphical model structure of any generative model. Such inverses have two crucial properties: (a) they do not encode any independence assertions that are absent from the model and; (b) they are local maxima for the number of true independencies encoded. We prove the correctness of our approach and empirically show that the resulting minimally faithful inverses lead to better inference amortization than existing heuristic approaches.", "bibtex": "@inproceedings{NEURIPS2018_894b77f8,\n author = {Webb, Stefan and Golinski, Adam and Zinkov, Rob and N, Siddharth and Rainforth, Tom and Teh, Yee Whye and Wood, Frank},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Faithful Inversion of Generative Models for Effective Amortized Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/894b77f805bd94d292574c38c5d628d5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/894b77f805bd94d292574c38c5d628d5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/894b77f805bd94d292574c38c5d628d5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/894b77f805bd94d292574c38c5d628d5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/894b77f805bd94d292574c38c5d628d5-Reviews.html", "metareview": "", "pdf_size": 6067454, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15042488893381793625&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "University of Oxford; University of Oxford; UBC; University of Oxford; University of Oxford; University of Oxford; UBC", "aff_domain": "stefanwebb.me; ; ; ; ; ; ", "email": "stefanwebb.me; ; ; ; ; ; ", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/894b77f805bd94d292574c38c5d628d5-Abstract.html", "aff_unique_index": "0;0;1;0;0;0;1", "aff_unique_norm": "University of Oxford;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.ubc.ca", "aff_unique_abbr": "Oxford;UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;1", "aff_country_unique": "United Kingdom;Canada" }, { "title": "Fast Approximate Natural Gradient Descent in a Kronecker Factored Eigenbasis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11908", "id": "11908", "author_site": "Thomas George, C\u00e9sar Laurent, Xavier Bouthillier, Nicolas Ballas, Pascal Vincent", "author": "Thomas George; C\u00e9sar Laurent; Xavier Bouthillier; Nicolas Ballas; Pascal Vincent", "abstract": "Optimization algorithms that leverage gradient covariance information, such as variants of natural gradient descent (Amari, 1998), offer the prospect of yielding more effective descent directions. For models with many parameters, the covari- ance matrix they are based on becomes gigantic, making them inapplicable in their original form. This has motivated research into both simple diagonal approxima- tions and more sophisticated factored approximations such as KFAC (Heskes, 2000; Martens & Grosse, 2015; Grosse & Martens, 2016). In the present work we draw inspiration from both to propose a novel approximation that is provably better than KFAC and amendable to cheap partial updates. It consists in tracking a diagonal variance, not in parameter coordinates, but in a Kronecker-factored eigenbasis, in which the diagonal approximation is likely to be more effective. Experiments show improvements over KFAC in optimization speed for several deep network architectures.", "bibtex": "@inproceedings{NEURIPS2018_48000647,\n author = {George, Thomas and Laurent, C\\'{e}sar and Bouthillier, Xavier and Ballas, Nicolas and Vincent, Pascal},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast Approximate Natural Gradient Descent in a Kronecker Factored Eigenbasis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/48000647b315f6f00f913caa757a70b3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/48000647b315f6f00f913caa757a70b3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/48000647b315f6f00f913caa757a70b3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/48000647b315f6f00f913caa757a70b3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/48000647b315f6f00f913caa757a70b3-Reviews.html", "metareview": "", "pdf_size": 849664, "gs_citation": 193, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5230724702304423704&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Mila - Universit\u00e9 de Montr\u00e9al; Mila - Universit\u00e9 de Montr\u00e9al; Mila - Universit\u00e9 de Montr\u00e9al; Facebook AI Research; Mila - Universit\u00e9 de Montr\u00e9al + Facebook AI Research + CIFAR", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/48000647b315f6f00f913caa757a70b3-Abstract.html", "aff_unique_index": "0;0;0;1;0+1+2", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Meta;Canadian Institute for Advanced Research", "aff_unique_dep": "Mila;Facebook AI Research;", "aff_unique_url": "https://www.umontreal.ca;https://research.facebook.com;https://www.cifar.ca", "aff_unique_abbr": "UdeM;FAIR;CIFAR", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Montr\u00e9al;", "aff_country_unique_index": "0;0;0;1;0+1+0", "aff_country_unique": "Canada;United States" }, { "title": "Fast Estimation of Causal Interactions using Wold Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11303", "id": "11303", "author_site": "Flavio Figueiredo, Guilherme Resende Borges, Pedro O.S. Vaz de Melo, Renato Assun\u00e7\u00e3o", "author": "Flavio Figueiredo; Guilherme Resende Borges; Pedro O.S. Vaz de Melo; Renato Assun\u00e7\u00e3o", "abstract": "We here focus on the task of learning Granger causality matrices for multivariate point processes. In order to accomplish this task, our work is the first to explore the use of Wold processes. By doing so, we are able to develop asymptotically fast MCMC learning algorithms. With $N$ being the total number of events and $K$ the number of processes, our learning algorithm has a $O(N(\\,\\log(N)\\,+\\,\\log(K)))$ cost per iteration. This is much faster than the $O(N^3\\,K^2)$ or $O(K^3)$ for the state of the art. Our approach, called GrangerBusca, is validated on nine datasets. This is an advance in relation to most prior efforts which focus mostly on subsets of the Memetracker data. Regarding accuracy, GrangerBusca is three times more accurate (in Precision@10) than the state of the art for the commonly explored subsets Memetracker. Due to GrangerBusca's much lower training complexity, our approach is the only one able to train models for larger, full, sets of data.", "bibtex": "@inproceedings{NEURIPS2018_aff0a6a4,\n author = {Figueiredo, Flavio and Resende Borges, Guilherme and O.S. Vaz de Melo, Pedro and Assun\\c{c}\\~{a}o, Renato},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast Estimation of Causal Interactions using Wold Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/aff0a6a4521232970b2c1cf539ad0a19-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/aff0a6a4521232970b2c1cf539ad0a19-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/aff0a6a4521232970b2c1cf539ad0a19-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/aff0a6a4521232970b2c1cf539ad0a19-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/aff0a6a4521232970b2c1cf539ad0a19-Reviews.html", "metareview": "", "pdf_size": 317026, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3436970798067835046&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Universidade Federal de Minas Gerais (UFMG); Universidade Federal de Minas Gerais (UFMG); Universidade Federal de Minas Gerais (UFMG); Universidade Federal de Minas Gerais (UFMG)", "aff_domain": "dcc.ufmg.br;dcc.ufmg.br;dcc.ufmg.br;dcc.ufmg.br", "email": "dcc.ufmg.br;dcc.ufmg.br;dcc.ufmg.br;dcc.ufmg.br", "github": "http://github.com/flaviovdf/granger-busca", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/aff0a6a4521232970b2c1cf539ad0a19-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Universidade Federal de Minas Gerais", "aff_unique_dep": "", "aff_unique_url": "https://www.ufmg.br", "aff_unique_abbr": "UFMG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Brazil" }, { "title": "Fast Greedy MAP Inference for Determinantal Point Process to Improve Recommendation Diversity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11548", "id": "11548", "author_site": "Laming Chen, Guoxin Zhang, Eric Zhou", "author": "Laming Chen; Guoxin Zhang; Eric Zhou", "abstract": "The determinantal point process (DPP) is an elegant probabilistic model of repulsion with applications in various machine learning tasks including summarization and search. However, the maximum a posteriori (MAP) inference for DPP which plays an important role in many applications is NP-hard, and even the popular greedy algorithm can still be too computationally expensive to be used in large-scale real-time scenarios. To overcome the computational challenge, in this paper, we propose a novel algorithm to greatly accelerate the greedy MAP inference for DPP. In addition, our algorithm also adapts to scenarios where the repulsion is only required among nearby few items in the result sequence. We apply the proposed algorithm to generate relevant and diverse recommendations. Experimental results show that our proposed algorithm is significantly faster than state-of-the-art competitors, and provides a better relevance-diversity trade-off on several public datasets, which is also confirmed in an online A/B test.", "bibtex": "@inproceedings{NEURIPS2018_dbbf603f,\n author = {Chen, Laming and Zhang, Guoxin and Zhou, Eric},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast Greedy MAP Inference for Determinantal Point Process to Improve Recommendation Diversity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dbbf603ff0e99629dda5d75b6f75f966-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dbbf603ff0e99629dda5d75b6f75f966-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dbbf603ff0e99629dda5d75b6f75f966-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dbbf603ff0e99629dda5d75b6f75f966-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dbbf603ff0e99629dda5d75b6f75f966-Reviews.html", "metareview": "", "pdf_size": 646354, "gs_citation": 335, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18302515766633517851&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Hulu LLC, Beijing, China; Kwai Inc., Beijing, China + Hulu LLC, Beijing, China; Hulu LLC, Beijing, China", "aff_domain": "hulu.com;kuaishou.com;gmail.com", "email": "hulu.com;kuaishou.com;gmail.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dbbf603ff0e99629dda5d75b6f75f966-Abstract.html", "aff_unique_index": "0;1+0;0", "aff_unique_norm": "Hulu LLC;Kwai Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.hulu.com;https://www.kwai.com", "aff_unique_abbr": "Hulu;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0+0;0", "aff_country_unique": "China" }, { "title": "Fast Rates of ERM and Stochastic Approximation: Adaptive to Error Bound Conditions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11460", "id": "11460", "author_site": "Mingrui Liu, Xiaoxuan Zhang, Lijun Zhang, Rong Jin, Tianbao Yang", "author": "Mingrui Liu; Xiaoxuan Zhang; Lijun Zhang; Rong Jin; Tianbao Yang", "abstract": "Error bound conditions (EBC) are properties that characterize the growth of an objective function when a point is moved away from the optimal set. They have recently received increasing attention in the field of optimization for developing optimization algorithms with fast convergence. However, the studies of EBC in statistical learning are hitherto still limited. The main contributions of this paper are two-fold. First, we develop fast and intermediate rates of empirical risk minimization (ERM) under EBC for risk minimization with Lipschitz continuous, and smooth convex random functions. Second, we establish fast and intermediate rates of an efficient stochastic approximation (SA) algorithm for risk minimization with Lipschitz continuous random functions, which requires only one pass of $n$ samples and adapts to EBC. For both approaches, the convergence rates span a full spectrum between $\\widetilde O(1/\\sqrt{n})$ and $\\widetilde O(1/n)$ depending on the power constant in EBC, and could be even faster than $O(1/n)$ in special cases for ERM. Moreover, these convergence rates are automatically adaptive without using any knowledge of EBC. Overall, this work not only strengthens the understanding of ERM for statistical learning but also brings new fast stochastic algorithms for solving a broad range of statistical learning problems.", "bibtex": "@inproceedings{NEURIPS2018_716e1b8c,\n author = {Liu, Mingrui and Zhang, Xiaoxuan and Zhang, Lijun and Jin, Rong and Yang, Tianbao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast Rates of ERM and Stochastic Approximation: Adaptive to Error Bound Conditions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/716e1b8c6cd17b771da77391355749f3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/716e1b8c6cd17b771da77391355749f3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/716e1b8c6cd17b771da77391355749f3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/716e1b8c6cd17b771da77391355749f3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/716e1b8c6cd17b771da77391355749f3-Reviews.html", "metareview": "", "pdf_size": 410274, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8468424506709365275&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "Department of Computer Science, The University of Iowa, Iowa City, IA 52242, USA; Department of Computer Science, The University of Iowa, Iowa City, IA 52242, USA; National Key Laboratory for Novel Software Technology, Nanjing University, China; Machine Intelligence Technology, Alibaba Group, Bellevue, WA 98004, USA; Department of Computer Science, The University of Iowa, Iowa City, IA 52242, USA", "aff_domain": "uiowa.edu; ;gmail.com; ;uiowa.edu", "email": "uiowa.edu; ;gmail.com; ;uiowa.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/716e1b8c6cd17b771da77391355749f3-Abstract.html", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Iowa;Nanjing University;Alibaba Group", "aff_unique_dep": "Department of Computer Science;National Key Laboratory for Novel Software Technology;Machine Intelligence Technology", "aff_unique_url": "https://www.uiowa.edu;http://www.nju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "UIowa;Nanjing U;Alibaba", "aff_campus_unique_index": "0;0;2;0", "aff_campus_unique": "Iowa City;;Bellevue", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Fast Similarity Search via Optimal Sparse Lifting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11044", "id": "11044", "author_site": "Wenye Li, Jingwei Mao, Yin Zhang, Shuguang Cui", "author": "Wenye Li; Jingwei Mao; Yin Zhang; Shuguang Cui", "abstract": "Similarity search is a fundamental problem in computing science with various applications and has attracted significant research attention, especially in large-scale search with high dimensions. Motivated by the evidence in biological science, our work develops a novel approach for similarity search. Fundamentally different from existing methods that typically reduce the dimension of the data to lessen the computational complexity and speed up the search, our approach projects the data into an even higher-dimensional space while ensuring the sparsity of the data in the output space, with the objective of further improving precision and speed. Specifically, our approach has two key steps. Firstly, it computes the optimal sparse lifting for given input samples and increases the dimension of the data while approximately preserving their pairwise similarity. Secondly, it seeks the optimal lifting operator that best maps input samples to the optimal sparse lifting. Computationally, both steps are modeled as optimization problems that can be efficiently and effectively solved by the Frank-Wolfe algorithm. Simple as it is, our approach has reported significantly improved results in empirical evaluations, and exhibited its high potentials in solving practical problems.", "bibtex": "@inproceedings{NEURIPS2018_a8baa565,\n author = {Li, Wenye and Mao, Jingwei and Zhang, Yin and Cui, Shuguang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast Similarity Search via Optimal Sparse Lifting},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a8baa56554f96369ab93e4f3bb068c22-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a8baa56554f96369ab93e4f3bb068c22-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a8baa56554f96369ab93e4f3bb068c22-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a8baa56554f96369ab93e4f3bb068c22-Reviews.html", "metareview": "", "pdf_size": 411807, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1732198875391698446&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff": "The Chinese University of Hong Kong, Shenzhen, China+Shenzhen Research Institute of Big Data, Shenzhen, China; The Chinese University of Hong Kong, Shenzhen, China; The Chinese University of Hong Kong, Shenzhen, China; The Chinese University of Hong Kong, Shenzhen, China+Shenzhen Research Institute of Big Data, Shenzhen, China", "aff_domain": "cuhk.edu.cn;link.cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "email": "cuhk.edu.cn;link.cuhk.edu.cn;cuhk.edu.cn;cuhk.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a8baa56554f96369ab93e4f3bb068c22-Abstract.html", "aff_unique_index": "0+1;0;0;0+1", "aff_unique_norm": "Chinese University of Hong Kong;Shenzhen Research Institute of Big Data", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.cn;http://www.sribd.cn", "aff_unique_abbr": "CUHK;", "aff_campus_unique_index": "0+0;0;0;0+0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0+0;0;0;0+0", "aff_country_unique": "China" }, { "title": "Fast and Effective Robustness Certification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12021", "id": "12021", "author_site": "Gagandeep Singh, Timon Gehr, Matthew Mirman, Markus P\u00fcschel, Martin Vechev", "author": "Gagandeep Singh; Timon Gehr; Matthew Mirman; Markus P\u00fcschel; Martin Vechev", "abstract": "We present a new method and system, called DeepZ, for certifying neural network\nrobustness based on abstract interpretation. Compared to state-of-the-art automated\nverifiers for neural networks, DeepZ: (i) handles ReLU, Tanh and Sigmoid activation functions, (ii) supports feedforward and convolutional architectures, (iii)\nis significantly more scalable and precise, and (iv) and is sound with respect to\nfloating point arithmetic. These benefits are due to carefully designed approximations tailored to the setting of neural networks. As an example, DeepZ achieves a\nverification accuracy of 97% on a large network with 88,500 hidden units under\n$L_{\\infty}$ attack with $\\epsilon = 0.1$ with an average runtime of 133 seconds.", "bibtex": "@inproceedings{NEURIPS2018_f2f44698,\n author = {Singh, Gagandeep and Gehr, Timon and Mirman, Matthew and P\\\"{u}schel, Markus and Vechev, Martin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast and Effective Robustness Certification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f2f446980d8e971ef3da97af089481c3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f2f446980d8e971ef3da97af089481c3-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f2f446980d8e971ef3da97af089481c3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f2f446980d8e971ef3da97af089481c3-Reviews.html", "metareview": "", "pdf_size": 285654, "gs_citation": 674, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14718418488361488090&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science, ETH Zurich, Switzerland; Department of Computer Science, ETH Zurich, Switzerland; Department of Computer Science, ETH Zurich, Switzerland; Department of Computer Science, ETH Zurich, Switzerland; Department of Computer Science, ETH Zurich, Switzerland", "aff_domain": "inf.ethz.ch;inf.ethz.ch;inf.ethz.ch;inf.ethz.ch;inf.ethz.ch", "email": "inf.ethz.ch;inf.ethz.ch;inf.ethz.ch;inf.ethz.ch;inf.ethz.ch", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f2f446980d8e971ef3da97af089481c3-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Fast deep reinforcement learning using online adjustments from the past", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11999", "id": "11999", "author_site": "Steven Hansen, Alexander Pritzel, Pablo Sprechmann, Andre Barreto, Charles Blundell", "author": "Steven Hansen; Alexander Pritzel; Pablo Sprechmann; Andre Barreto; Charles Blundell", "abstract": "We propose Ephemeral Value Adjusments (EVA): a means of allowing deep reinforcement learning agents to rapidly adapt to experience in their replay buffer.\nEVA shifts the value predicted by a neural network with an estimate of the value function found by prioritised sweeping over experience tuples from the replay buffer near the current state. EVA combines a number of recent ideas around combining episodic memory-like structures into reinforcement learning agents: slot-based storage, content-based retrieval, and memory-based planning.\nWe show that EVA is performant on a demonstration task and Atari games.", "bibtex": "@inproceedings{NEURIPS2018_f7bdb0e1,\n author = {Hansen, Steven and Pritzel, Alexander and Sprechmann, Pablo and Barreto, Andre and Blundell, Charles},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast deep reinforcement learning using online adjustments from the past},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f7bdb0e100275600f9e183e25d81822d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f7bdb0e100275600f9e183e25d81822d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f7bdb0e100275600f9e183e25d81822d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f7bdb0e100275600f9e183e25d81822d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f7bdb0e100275600f9e183e25d81822d-Reviews.html", "metareview": "", "pdf_size": 482681, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18117366302726355986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "DeepMind; DeepMind; DeepMind; DeepMind; DeepMind", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f7bdb0e100275600f9e183e25d81822d-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "DeepMind", "aff_unique_dep": "", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Fast greedy algorithms for dictionary selection with generalized sparsity constraints", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11466", "id": "11466", "author_site": "Kaito Fujii, Tasuku Soma", "author": "Kaito Fujii; Tasuku Soma", "abstract": "In dictionary selection, several atoms are selected from finite candidates that successfully approximate given data points in the sparse representation. We propose a novel efficient greedy algorithm for dictionary selection. Not only does our algorithm work much faster than the known methods, but it can also handle more complex sparsity constraints, such as average sparsity. Using numerical experiments, we show that our algorithm outperforms the known methods for dictionary selection, achieving competitive performances with dictionary learning algorithms in a smaller running time.", "bibtex": "@inproceedings{NEURIPS2018_e069ea4c,\n author = {Fujii, Kaito and Soma, Tasuku},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast greedy algorithms for dictionary selection with generalized sparsity constraints},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e069ea4c9c233d36ff9c7f329bc08ff1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e069ea4c9c233d36ff9c7f329bc08ff1-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e069ea4c9c233d36ff9c7f329bc08ff1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e069ea4c9c233d36ff9c7f329bc08ff1-Reviews.html", "metareview": "", "pdf_size": 557607, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13114279535984561194&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Graduate School of Information Sciences and Technology, The University of Tokyo; Graduate School of Information Sciences and Technology, The University of Tokyo", "aff_domain": "mist.i.u-tokyo.ac.jp;mist.i.u-tokyo.ac.jp", "email": "mist.i.u-tokyo.ac.jp;mist.i.u-tokyo.ac.jp", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e069ea4c9c233d36ff9c7f329bc08ff1-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "Graduate School of Information Sciences and Technology", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Tokyo", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "FastGRNN: A Fast, Accurate, Stable and Tiny Kilobyte Sized Gated Recurrent Neural Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11860", "id": "11860", "author_site": "Aditya Kusupati, Manish Singh, Kush Bhatia, Ashish Kumar, Prateek Jain, Manik Varma", "author": "Aditya Kusupati; Manish Singh; Kush Bhatia; Ashish Kumar; Prateek Jain; Manik Varma", "abstract": "This paper develops the FastRNN and FastGRNN algorithms to address the twin RNN limitations of inaccurate training and inefficient prediction. Previous approaches have improved accuracy at the expense of prediction costs making them infeasible for resource-constrained and real-time applications. Unitary RNNs have increased accuracy somewhat by restricting the range of the state transition matrix's singular values but have also increased the model size as they require a larger number of hidden units to make up for the loss in expressive power. Gated RNNs have obtained state-of-the-art accuracies by adding extra parameters thereby resulting in even larger models. FastRNN addresses these limitations by adding a residual connection that does not constrain the range of the singular values explicitly and has only two extra scalar parameters. FastGRNN then extends the residual connection to a gate by reusing the RNN matrices to match state-of-the-art gated RNN accuracies but with a 2-4x smaller model. Enforcing FastGRNN's matrices to be low-rank, sparse and quantized resulted in accurate models that could be up to 35x smaller than leading gated and unitary RNNs. This allowed FastGRNN to accurately recognize the \"Hey Cortana\" wakeword with a 1 KB model and to be deployed on severely resource-constrained IoT microcontrollers too tiny to store other RNN models. FastGRNN's code is available at (https://github.com/Microsoft/EdgeML/).", "bibtex": "@inproceedings{NEURIPS2018_ab013ca6,\n author = {Kusupati, Aditya and Singh, Manish and Bhatia, Kush and Kumar, Ashish and Jain, Prateek and Varma, Manik},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {FastGRNN: A Fast, Accurate, Stable and Tiny Kilobyte Sized Gated Recurrent Neural Network},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ab013ca67cf2d50796b0c11d1b8bc95d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ab013ca67cf2d50796b0c11d1b8bc95d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ab013ca67cf2d50796b0c11d1b8bc95d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ab013ca67cf2d50796b0c11d1b8bc95d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ab013ca67cf2d50796b0c11d1b8bc95d-Reviews.html", "metareview": "", "pdf_size": 494010, "gs_citation": 249, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14286601091173970187&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 14, "aff": "Microsoft Research India; Indian Institute of Technology Delhi; University of California Berkeley + Microsoft Research India; University of California Berkeley; Microsoft Research India; Microsoft Research India", "aff_domain": "microsoft.com;gmail.com;cs.berkeley.edu;berkeley.edu;microsoft.com;microsoft.com", "email": "microsoft.com;gmail.com;cs.berkeley.edu;berkeley.edu;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ab013ca67cf2d50796b0c11d1b8bc95d-Abstract.html", "aff_unique_index": "0;1;2+0;2;0;0", "aff_unique_norm": "Microsoft;Indian Institute of Technology Delhi;University of California, Berkeley", "aff_unique_dep": "Microsoft Research India;;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/microsoft-research-india;https://www.iitd.ac.in;https://www.berkeley.edu", "aff_unique_abbr": "MSR India;IIT Delhi;UC Berkeley", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Delhi;Berkeley", "aff_country_unique_index": "0;0;1+0;1;0;0", "aff_country_unique": "India;United States" }, { "title": "Faster Neural Networks Straight from JPEG", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11391", "id": "11391", "author_site": "Lionel Gueguen, Alex Sergeev, Ben Kadlec, Rosanne Liu, Jason Yosinski", "author": "Lionel Gueguen; Alex Sergeev; Ben Kadlec; Rosanne Liu; Jason Yosinski", "abstract": "The simple, elegant approach of training convolutional neural\n networks (CNNs) directly from RGB pixels has enjoyed overwhelming\n empirical success. But can more performance be squeezed out of\n networks by using different input representations? In this paper we\n propose and explore a simple idea: train CNNs directly on the\n blockwise discrete cosine transform (DCT) coefficients computed and\n available in the middle of the JPEG codec. Intuitively, when\n processing JPEG images using CNNs, it seems unnecessary to\n decompress a blockwise frequency representation to an expanded pixel\n representation, shuffle it from CPU to GPU, and then process it with\n a CNN that will learn something similar to a transform back to\n frequency representation in its first layers. Why not skip both\n steps and feed the frequency domain into the network directly? In\n this paper we modify \\libjpeg to produce DCT coefficients directly,\n modify a ResNet-50 network to accommodate the differently sized and\n strided input, and evaluate performance on ImageNet. We find\n networks that are both faster and more accurate, as well as networks\n with about the same accuracy but 1.77x faster than ResNet-50.", "bibtex": "@inproceedings{NEURIPS2018_7af6266c,\n author = {Gueguen, Lionel and Sergeev, Alex and Kadlec, Ben and Liu, Rosanne and Yosinski, Jason},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Faster Neural Networks Straight from JPEG},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7af6266cc52234b5aa339b16695f7fc4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7af6266cc52234b5aa339b16695f7fc4-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7af6266cc52234b5aa339b16695f7fc4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7af6266cc52234b5aa339b16695f7fc4-Reviews.html", "metareview": "", "pdf_size": 508176, "gs_citation": 287, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9617446820670115100&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Uber; Uber; Uber; Uber AI Labs; Uber AI Labs", "aff_domain": "uber.com;uber.com;uber.com;uber.com;uber.com", "email": "uber.com;uber.com;uber.com;uber.com;uber.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7af6266cc52234b5aa339b16695f7fc4-Abstract.html", "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "Uber Technologies Inc.;Uber", "aff_unique_dep": ";Uber AI Labs", "aff_unique_url": "https://www.uber.com;https://www.uber.com", "aff_unique_abbr": "Uber;Uber AI Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Faster Online Learning of Optimal Threshold for Consistent F-measure Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11387", "id": "11387", "author_site": "Xiaoxuan Zhang, Mingrui Liu, Xun Zhou, Tianbao Yang", "author": "Xiaoxuan Zhang; Mingrui Liu; Xun Zhou; Tianbao Yang", "abstract": "In this paper, we consider online F-measure optimization (OFO). Unlike traditional performance metrics (e.g., classification error rate), F-measure is non-decomposable over training examples and is a non-convex function of model parameters, making it much more difficult to be optimized in an online fashion. Most existing results of OFO usually suffer from high memory/computational costs and/or lack statistical consistency guarantee for optimizing F-measure at the population level. To advance OFO, we propose an efficient online algorithm based on simultaneously learning a posterior probability of class and learning an optimal threshold by minimizing a stochastic strongly convex function with unknown strong convexity parameter. A key component of the proposed method is a novel stochastic algorithm with low memory and computational costs, which can enjoy a convergence rate of $\\widetilde O(1/\\sqrt{n})$ for learning the optimal threshold under a mild condition on the convergence of the posterior probability, where $n$ is the number of processed examples. It is provably faster than its predecessor based on a heuristic for updating the threshold. The experiments verify the efficiency of the proposed algorithm in comparison with state-of-the-art OFO algorithms.", "bibtex": "@inproceedings{NEURIPS2018_65fc52ed,\n author = {Zhang, Xiaoxuan and Liu, Mingrui and Zhou, Xun and Yang, Tianbao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Faster Online Learning of Optimal Threshold for Consistent F-measure Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/65fc52ed8f88c81323a418ca94cec2ed-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/65fc52ed8f88c81323a418ca94cec2ed-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/65fc52ed8f88c81323a418ca94cec2ed-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/65fc52ed8f88c81323a418ca94cec2ed-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/65fc52ed8f88c81323a418ca94cec2ed-Reviews.html", "metareview": "", "pdf_size": 576114, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8274525115465410469&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, The University of Iowa, Iowa City, IA 52242, USA+Department of Management Sciences, The University of Iowa, Iowa City, IA 52242, USA; Department of Computer Science, The University of Iowa, Iowa City, IA 52242, USA+Department of Management Sciences, The University of Iowa, Iowa City, IA 52242, USA; Department of Management Sciences, The University of Iowa, Iowa City, IA 52242, USA; Department of Computer Science, The University of Iowa, Iowa City, IA 52242, USA", "aff_domain": "uiowa.edu;uiowa.edu; ; ", "email": "uiowa.edu;uiowa.edu; ; ", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/65fc52ed8f88c81323a418ca94cec2ed-Abstract.html", "aff_unique_index": "0+0;0+0;0;0", "aff_unique_norm": "University of Iowa", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.uiowa.edu", "aff_unique_abbr": "UIowa", "aff_campus_unique_index": "0+0;0+0;0;0", "aff_campus_unique": "Iowa City", "aff_country_unique_index": "0+0;0+0;0;0", "aff_country_unique": "United States" }, { "title": "Fighting Boredom in Recommender Systems with Linear Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11189", "id": "11189", "author_site": "Romain WARLOP, Alessandro Lazaric, J\u00e9r\u00e9mie Mary", "author": "Romain WARLOP; Alessandro Lazaric; J\u00e9r\u00e9mie Mary", "abstract": "A common assumption in recommender systems (RS) is the existence of a best fixed recommendation strategy. Such strategy may be simple and work at the item level (e.g., in multi-armed bandit it is assumed one best fixed arm/item exists) or implement more sophisticated RS (e.g., the objective of A/B testing is to find the\nbest fixed RS and execute it thereafter). We argue that this assumption is rarely verified in practice, as the recommendation process itself may impact the user\u2019s\npreferences. For instance, a user may get bored by a strategy, while she may gain interest again, if enough time passed since the last time that strategy was used. In\nthis case, a better approach consists in alternating different solutions at the right frequency to fully exploit their potential. In this paper, we first cast the problem as\na Markov decision process, where the rewards are a linear function of the recent history of actions, and we show that a policy considering the long-term influence\nof the recommendations may outperform both fixed-action and contextual greedy policies. We then introduce an extension of the UCRL algorithm ( L IN UCRL ) to\neffectively balance exploration and exploitation in an unknown environment, and we derive a regret bound that is independent of the number of states. Finally,\nwe empirically validate the model assumptions and the algorithm in a number of realistic scenarios.", "bibtex": "@inproceedings{NEURIPS2018_210f760a,\n author = {WARLOP, Romain and Lazaric, Alessandro and Mary, J\\'{e}r\\'{e}mie},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fighting Boredom in Recommender Systems with Linear Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/210f760a89db30aa72ca258a3483cc7f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/210f760a89db30aa72ca258a3483cc7f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/210f760a89db30aa72ca258a3483cc7f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/210f760a89db30aa72ca258a3483cc7f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/210f760a89db30aa72ca258a3483cc7f-Reviews.html", "metareview": "", "pdf_size": 543676, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15885221111907708925&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "\ufb01fty-\ufb01ve, Paris, France+SequeL Team, Inria Lille, France; Facebook AI Research, Paris, France; Criteo AI Lab, Paris, France", "aff_domain": "fifty-five.com;fb.com;criteo.com", "email": "fifty-five.com;fb.com;criteo.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/210f760a89db30aa72ca258a3483cc7f-Abstract.html", "aff_unique_index": "0+1;2;3", "aff_unique_norm": "\ufb01fty-\ufb01ve;INRIA Lille;Meta;Criteo", "aff_unique_dep": ";SequeL Team;Facebook AI Research;Criteo AI Lab", "aff_unique_url": ";https://www.inria.fr/en;https://research.facebook.com;https://www.criteo.com", "aff_unique_abbr": ";;FAIR;Criteo", "aff_campus_unique_index": "1;2;2", "aff_campus_unique": ";Lille;Paris", "aff_country_unique_index": "0+0;0;0", "aff_country_unique": "France" }, { "title": "First-order Stochastic Algorithms for Escaping From Saddle Points in Almost Linear Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11540", "id": "11540", "author_site": "Yi Xu, Rong Jin, Tianbao Yang", "author": "Yi Xu; Rong Jin; Tianbao Yang", "abstract": "(This is a theory paper) In this paper, we consider first-order methods for solving stochastic non-convex optimization problems. The key building block of the proposed algorithms is first-order procedures to extract negative curvature from the Hessian matrix through a principled sequence starting from noise, which are referred to {\\it NEgative-curvature-Originated-from-Noise or NEON} and are of independent interest. Based on this building block, we design purely first-order stochastic algorithms for escaping from non-degenerate saddle points with a much better time complexity (almost linear time in the problem's dimensionality). In particular, we develop a general framework of {\\it first-order stochastic algorithms} with a second-order convergence guarantee based on our new technique and existing algorithms that may only converge to a first-order stationary point. For finding a nearly {\\it second-order stationary point} $\\x$ such that $\\|\\nabla F(\\x)\\|\\leq \\epsilon$ and $\\nabla^2 F(\\x)\\geq -\\sqrt{\\epsilon}I$ (in high probability), the best time complexity of the presented algorithms is $\\widetilde O(d/\\epsilon^{3.5})$, where $F(\\cdot)$ denotes the objective function and $d$ is the dimensionality of the problem. To the best of our knowledge, this is the first theoretical result of first-order stochastic algorithms with an almost linear time in terms of problem's dimensionality for finding second-order stationary points, which is even competitive with existing stochastic algorithms hinging on the second-order information.", "bibtex": "@inproceedings{NEURIPS2018_217e342f,\n author = {Xu, Yi and Jin, Rong and Yang, Tianbao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {First-order Stochastic Algorithms for Escaping From Saddle Points in Almost Linear Time},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/217e342fc01668b10cb1188d40d3370e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/217e342fc01668b10cb1188d40d3370e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/217e342fc01668b10cb1188d40d3370e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/217e342fc01668b10cb1188d40d3370e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/217e342fc01668b10cb1188d40d3370e-Reviews.html", "metareview": "", "pdf_size": 426116, "gs_citation": 145, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2234750812857495141&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Computer Science, The University of Iowa, Iowa City, IA 52246, USA; Machine Intelligence Technology, Alibaba Group, Bellevue, WA 98004, USA; Department of Computer Science, The University of Iowa, Iowa City, IA 52246, USA", "aff_domain": "uiowa.edu;alibaba-inc.com;uiowa.edu", "email": "uiowa.edu;alibaba-inc.com;uiowa.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/217e342fc01668b10cb1188d40d3370e-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Iowa;Alibaba Group", "aff_unique_dep": "Department of Computer Science;Machine Intelligence Technology", "aff_unique_url": "https://www.uiowa.edu;https://www.alibaba.com", "aff_unique_abbr": "UIowa;Alibaba", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Iowa City;Bellevue", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "FishNet: A Versatile Backbone for Image, Region, and Pixel Level Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11098", "id": "11098", "author_site": "Shuyang Sun, Jiangmiao Pang, Jianping Shi, Shuai Yi, Wanli Ouyang", "author": "Shuyang Sun; Jiangmiao Pang; Jianping Shi; Shuai Yi; Wanli Ouyang", "abstract": "The basic principles in designing convolutional neural network (CNN) structures for predicting objects on different levels, e.g., image-level, region-level, and pixel-level, are diverging. Generally, network structures designed specifically for image classification are directly used as default backbone structure for other tasks including detection and segmentation, but there is seldom backbone structure designed under the consideration of unifying the advantages of networks designed for pixel-level or region-level predicting tasks, which may require very deep features with high resolution. Towards this goal, we design a fish-like network, called FishNet. In FishNet, the information of all resolutions is preserved and refined for the final task. Besides, we observe that existing works still cannot \\emph{directly} propagate the gradient information from deep layers to shallow layers. Our design can better handle this problem. Extensive experiments have been conducted to demonstrate the remarkable performance of the FishNet. In particular, on ImageNet-1k, the accuracy of FishNet is able to surpass the performance of DenseNet and ResNet with fewer parameters. FishNet was applied as one of the modules in the winning entry of the COCO Detection 2018 challenge. The code is available at https://github.com/kevin-ssy/FishNet.", "bibtex": "@inproceedings{NEURIPS2018_75fc093c,\n author = {Sun, Shuyang and Pang, Jiangmiao and Shi, Jianping and Yi, Shuai and Ouyang, Wanli},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {FishNet: A Versatile Backbone for Image, Region, and Pixel Level Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/75fc093c0ee742f6dddaa13fff98f104-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/75fc093c0ee742f6dddaa13fff98f104-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/75fc093c0ee742f6dddaa13fff98f104-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/75fc093c0ee742f6dddaa13fff98f104-Reviews.html", "metareview": "", "pdf_size": 1675522, "gs_citation": 128, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8077266557125333363&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "The University of Sydney; Zhejiang University; SenseTime Research; SenseTime Research; The University of Sydney", "aff_domain": "sydney.edu.au; ; ; ; ", "email": "sydney.edu.au; ; ; ; ", "github": "https://github.com/kevin-ssy/FishNet", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/75fc093c0ee742f6dddaa13fff98f104-Abstract.html", "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "University of Sydney;Zhejiang University;SenseTime", "aff_unique_dep": ";;SenseTime Research", "aff_unique_url": "https://www.sydney.edu.au;https://www.zju.edu.cn;https://www.sensetime.com", "aff_unique_abbr": "USYD;ZJU;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "Australia;China" }, { "title": "Flexible and accurate inference and learning for deep generative models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11413", "id": "11413", "author_site": "Eszter V\u00e9rtes, Maneesh Sahani", "author": "Eszter V\u00e9rtes; Maneesh Sahani", "abstract": "We introduce a new approach to learning in hierarchical latent-variable generative\nmodels called the \u201cdistributed distributional code Helmholtz machine\u201d, which\nemphasises flexibility and accuracy in the inferential process. Like the original\nHelmholtz machine and later variational autoencoder algorithms (but unlike adver-\nsarial methods) our approach learns an explicit inference or \u201crecognition\u201d model\nto approximate the posterior distribution over the latent variables. Unlike these\nearlier methods, it employs a posterior representation that is not limited to a narrow\ntractable parametrised form (nor is it represented by samples). To train the genera-\ntive and recognition models we develop an extended wake-sleep algorithm inspired\nby the original Helmholtz machine. This makes it possible to learn hierarchical\nlatent models with both discrete and continuous variables, where an accurate poste-\nrior representation is essential. We demonstrate that the new algorithm outperforms\ncurrent state-of-the-art methods on synthetic, natural image patch and the MNIST\ndata sets.", "bibtex": "@inproceedings{NEURIPS2018_955cb567,\n author = {V\\'{e}rtes, Eszter and Sahani, Maneesh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Flexible and accurate inference and learning for deep generative models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/955cb567b6e38f4c6b3f28cc857fc38c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/955cb567b6e38f4c6b3f28cc857fc38c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/955cb567b6e38f4c6b3f28cc857fc38c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/955cb567b6e38f4c6b3f28cc857fc38c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/955cb567b6e38f4c6b3f28cc857fc38c-Reviews.html", "metareview": "", "pdf_size": 1010246, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11148662042285016668&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": ";", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/955cb567b6e38f4c6b3f28cc857fc38c-Abstract.html" }, { "title": "Flexible neural representation for physics prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11840", "id": "11840", "author_site": "Damian Mrowca, Chengxu Zhuang, Elias Wang, Nick Haber, Li Fei-Fei, Josh Tenenbaum, Daniel Yamins", "author": "Damian Mrowca; Chengxu Zhuang; Elias Wang; Nick Haber; Li F Fei-Fei; Josh Tenenbaum; Daniel L Yamins", "abstract": "Humans have a remarkable capacity to understand the physical dynamics of objects in their environment, flexibly capturing complex structures and interactions at multiple levels of detail.", "bibtex": "@inproceedings{NEURIPS2018_fd9dd764,\n author = {Mrowca, Damian and Zhuang, Chengxu and Wang, Elias and Haber, Nick and Fei-Fei, Li F and Tenenbaum, Josh and Yamins, Daniel L},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Flexible neural representation for physics prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fd9dd764a6f1d73f4340d570804eacc4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fd9dd764a6f1d73f4340d570804eacc4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/fd9dd764a6f1d73f4340d570804eacc4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fd9dd764a6f1d73f4340d570804eacc4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fd9dd764a6f1d73f4340d570804eacc4-Reviews.html", "metareview": "", "pdf_size": 2903961, "gs_citation": 295, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12674376607068364330&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": ";;;;;;", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fd9dd764a6f1d73f4340d570804eacc4-Abstract.html" }, { "title": "Forecasting Treatment Responses Over Time Using Recurrent Marginal Structural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11720", "id": "11720", "author_site": "Bryan Lim, Ahmed M. Alaa, Mihaela van der Schaar", "author": "Bryan Lim", "abstract": "Electronic health records provide a rich source of data for machine learning methods to learn dynamic treatment responses over time. However, any direct estimation is hampered by the presence of time-dependent confounding, where actions taken are dependent on time-varying variables related to the outcome of interest. Drawing inspiration from marginal structural models, a class of methods in epidemiology which use propensity weighting to adjust for time-dependent confounders, we introduce the Recurrent Marginal Structural Network - a sequence-to-sequence architecture for forecasting a patient's expected response to a series of planned treatments. Using simulations of a state-of-the-art pharmacokinetic-pharmacodynamic (PK-PD) model of tumor growth, we demonstrate the ability of our network to accurately learn unbiased treatment responses from observational data \u2013 even under changes in the policy of treatment assignments \u2013 and performance gains over benchmarks.", "bibtex": "@inproceedings{NEURIPS2018_56e6a932,\n author = {Lim, Bryan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Forecasting Treatment Responses Over Time Using Recurrent Marginal Structural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/56e6a93212e4482d99c84a639d254b67-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/56e6a93212e4482d99c84a639d254b67-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/56e6a93212e4482d99c84a639d254b67-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/56e6a93212e4482d99c84a639d254b67-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/56e6a93212e4482d99c84a639d254b67-Reviews.html", "metareview": "", "pdf_size": 480950, "gs_citation": 190, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9312966518414628527&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/56e6a93212e4482d99c84a639d254b67-Abstract.html" }, { "title": "Foreground Clustering for Joint Segmentation and Localization in Videos and Images", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11183", "id": "11183", "author": "Abhishek Sharma", "abstract": "This paper presents a novel framework in which video/image segmentation and localization are cast into a single optimization problem that integrates information from low level appearance cues with that of high level localization cues in a very weakly supervised manner. The proposed framework leverages two representations at different levels, exploits the spatial relationship between bounding boxes and superpixels as linear constraints and simultaneously discriminates between foreground and background at bounding box and superpixel level. Different from previous approaches that mainly rely on discriminative clustering, we incorporate a foreground model that minimizes the histogram difference of an object across all image frames. Exploiting the geometric relation between the superpixels and bounding boxes enables the transfer of segmentation cues to improve localization output and vice-versa. Inclusion of the foreground model generalizes our discriminative framework to video data where the background tends to be similar and thus, not discriminative. We demonstrate the effectiveness of our unified framework on the YouTube Object video dataset, Internet Object Discovery dataset and Pascal VOC 2007.", "bibtex": "@inproceedings{NEURIPS2018_3b3dbaf6,\n author = {Sharma, Abhishek},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Foreground Clustering for Joint Segmentation and Localization in Videos and Images},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3b3dbaf68507998acd6a5a5254ab2d76-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3b3dbaf68507998acd6a5a5254ab2d76-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3b3dbaf68507998acd6a5a5254ab2d76-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3b3dbaf68507998acd6a5a5254ab2d76-Reviews.html", "metareview": "", "pdf_size": 356822, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17831279481518881516&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Navinfo Europe Research, Eindhoven, NL", "aff_domain": "gmail.com", "email": "gmail.com", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3b3dbaf68507998acd6a5a5254ab2d76-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Navinfo Europe Research", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "0", "aff_campus_unique": "Eindhoven", "aff_country_unique_index": "0", "aff_country_unique": "Netherlands" }, { "title": "Forward Modeling for Partial Observation Strategy Games - A StarCraft Defogger", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12015", "id": "12015", "author_site": "Gabriel Synnaeve, Zeming Lin, Jonas Gehring, Dan Gant, Vegard Mella, Vasil Khalidov, Nicolas Carion, Nicolas Usunier", "author": "Gabriel Synnaeve; Zeming Lin; Jonas Gehring; Dan Gant; Vegard Mella; Vasil Khalidov; Nicolas Carion; Nicolas Usunier", "abstract": "We formulate the problem of defogging as state estimation and future state prediction from previous, partial observations in the context of real-time strategy games. We propose to employ encoder-decoder neural networks for this task, and introduce proxy tasks and baselines for evaluation to assess their ability of capturing basic game rules and high-level dynamics. By combining convolutional neural networks and recurrent networks, we exploit spatial and sequential correlations and train well-performing models on a large dataset of human games of StarCraft: Brood War. Finally, we demonstrate the relevance of our models to downstream tasks by applying them for enemy unit prediction in a state-of-the-art, rule-based StarCraft bot. We observe improvements in win rates against several strong community bots.", "bibtex": "@inproceedings{NEURIPS2018_287e0413,\n author = {Synnaeve, Gabriel and Lin, Zeming and Gehring, Jonas and Gant, Dan and Mella, Vegard and Khalidov, Vasil and Carion, Nicolas and Usunier, Nicolas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Forward Modeling for Partial Observation Strategy Games - A StarCraft Defogger},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/287e041302f34b11ddfb57afc8048cd8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/287e041302f34b11ddfb57afc8048cd8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/287e041302f34b11ddfb57afc8048cd8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/287e041302f34b11ddfb57afc8048cd8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/287e041302f34b11ddfb57afc8048cd8-Reviews.html", "metareview": "", "pdf_size": 318474, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5562179615762953081&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Facebook, NYC; Facebook, NYC; Facebook, Paris; Facebook, NYC; Facebook, Paris; Facebook, Paris; Facebook, Paris; Facebook, Paris", "aff_domain": "fb.com;fb.com;fb.com;fb.com;fb.com;fb.com;fb.com;fb.com", "email": "fb.com;fb.com;fb.com;fb.com;fb.com;fb.com;fb.com;fb.com", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/287e041302f34b11ddfb57afc8048cd8-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Facebook", "aff_unique_url": "https://www.facebook.com", "aff_unique_abbr": "FB", "aff_campus_unique_index": "0;0;1;0;1;1;1;1", "aff_campus_unique": "New York City;Paris", "aff_country_unique_index": "0;0;1;0;1;1;1;1", "aff_country_unique": "United States;France" }, { "title": "Found Graph Data and Planted Vertex Covers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11152", "id": "11152", "author_site": "Austin Benson, Jon Kleinberg", "author": "Austin R Benson; Jon Kleinberg", "abstract": "A typical way in which network data is recorded is to measure all interactions involving a specified set of core nodes, which produces a graph containing this core together with a potentially larger set of fringe nodes that link to the core. Interactions between nodes in the fringe, however, are not present in the resulting graph data. For example, a phone service provider may only record calls in which at least one of the participants is a customer; this can include calls between a customer and a non-customer, but not between pairs of non-customers. Knowledge of which nodes belong to the core is crucial for interpreting the dataset, but this metadata is unavailable in many cases, either because it has been lost due to difficulties in data provenance, or because the network consists of \"found data\" obtained in settings such as counter-surveillance. This leads to an algorithmic problem of recovering the core set. Since the core is a vertex cover, we essentially have a planted vertex cover problem, but with an arbitrary underlying graph. We develop a framework for analyzing this planted vertex cover problem, based on the theory of fixed-parameter tractability, together with algorithms for recovering the core. Our algorithms are fast, simple to implement, and out-perform several baselines based on core-periphery structure on various real-world datasets.", "bibtex": "@inproceedings{NEURIPS2018_afd48367,\n author = {Benson, Austin R and Kleinberg, Jon},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Found Graph Data and Planted Vertex Covers},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/afd4836712c5e77550897e25711e1d96-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/afd4836712c5e77550897e25711e1d96-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/afd4836712c5e77550897e25711e1d96-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/afd4836712c5e77550897e25711e1d96-Reviews.html", "metareview": "", "pdf_size": 1957310, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3952614015987874962&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Cornell University; Cornell University", "aff_domain": "cs.cornell.edu;cs.cornell.edu", "email": "cs.cornell.edu;cs.cornell.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/afd4836712c5e77550897e25711e1d96-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Frequency-Domain Dynamic Pruning for Convolutional Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11124", "id": "11124", "author_site": "Zhenhua Liu, Jizheng Xu, Xiulian Peng, Ruiqin Xiong", "author": "Zhenhua Liu; Jizheng Xu; Xiulian Peng; Ruiqin Xiong", "abstract": "Deep convolutional neural networks have demonstrated their powerfulness in a variety of applications. However, the storage and computational requirements have largely restricted their further extensions on mobile devices. Recently, pruning of unimportant parameters has been used for both network compression and acceleration. Considering that there are spatial redundancy within most filters in a CNN, we propose a frequency-domain dynamic pruning scheme to exploit the spatial correlations. The frequency-domain coefficients are pruned dynamically in each iteration and different frequency bands are pruned discriminatively, given their different importance on accuracy. Experimental results demonstrate that the proposed scheme can outperform previous spatial-domain counterparts by a large margin. Specifically, it can achieve a compression ratio of 8.4x and a theoretical inference speed-up of 9.2x for ResNet-110, while the accuracy is even better than the reference model on CIFAR-110.", "bibtex": "@inproceedings{NEURIPS2018_a9a6653e,\n author = {Liu, Zhenhua and Xu, Jizheng and Peng, Xiulian and Xiong, Ruiqin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Frequency-Domain Dynamic Pruning for Convolutional Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a9a6653e48976138166de32772b1bf40-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a9a6653e48976138166de32772b1bf40-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a9a6653e48976138166de32772b1bf40-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a9a6653e48976138166de32772b1bf40-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a9a6653e48976138166de32772b1bf40-Reviews.html", "metareview": "", "pdf_size": 1011375, "gs_citation": 200, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9152239531877131210&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University; Microsoft Research Asia; Microsoft Research Asia; Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University", "aff_domain": "pku.edu.cn;microsoft.com;microsoft.com;pku.edu.cn", "email": "pku.edu.cn;microsoft.com;microsoft.com;pku.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a9a6653e48976138166de32772b1bf40-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": "School of Electronic Engineering and Computer Science;Research", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "PKU;MSR Asia", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "From Stochastic Planning to Marginal MAP", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11313", "id": "11313", "author_site": "Hao(Jackson) Cui, Radu Marinescu, Roni Khardon", "author": "Hao(Jackson) Cui; Radu Marinescu; Roni Khardon", "abstract": "It is well known that the problems of stochastic planning and probabilistic inference are closely related. This paper makes two contributions in this context. The first is to provide an analysis of the recently developed SOGBOFA heuristic planning algorithm that was shown to be effective for problems with large factored state and action spaces. It is shown that SOGBOFA can be seen as a specialized inference algorithm that computes its solutions through a combination of a symbolic variant of belief propagation and gradient ascent. The second contribution is a new solver for Marginal MAP (MMAP) inference. We introduce a new reduction from MMAP to maximum expected utility problems which are suitable for the symbolic computation in SOGBOFA. This yields a novel algebraic gradient-based solver (AGS) for MMAP. An experimental evaluation illustrates the potential of AGS in solving difficult MMAP problems.", "bibtex": "@inproceedings{NEURIPS2018_5129a5dd,\n author = {Cui, Hao(Jackson) and Marinescu, Radu and Khardon, Roni},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {From Stochastic Planning to Marginal MAP},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5129a5ddcd0dcd755232baa04c231698-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5129a5ddcd0dcd755232baa04c231698-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5129a5ddcd0dcd755232baa04c231698-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5129a5ddcd0dcd755232baa04c231698-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5129a5ddcd0dcd755232baa04c231698-Reviews.html", "metareview": "", "pdf_size": 746698, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17975168712482092416&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, Tufts University; IBM Research; Department of Computer Science, Indiana University", "aff_domain": "tufts.edu;ie.ibm.com;iu.edu", "email": "tufts.edu;ie.ibm.com;iu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5129a5ddcd0dcd755232baa04c231698-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Tufts University;IBM;Indiana University", "aff_unique_dep": "Department of Computer Science;IBM Research;Department of Computer Science", "aff_unique_url": "https://www.tufts.edu;https://www.ibm.com/research;https://www.indiana.edu", "aff_unique_abbr": "Tufts;IBM;IU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fully Neural Network Based Speech Recognition on Mobile and Embedded Devices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12004", "id": "12004", "author_site": "Jinhwan Park, Yoonho Boo, Iksoo Choi, Sungho Shin, Wonyong Sung", "author": "Jinhwan Park; Yoonho Boo; Iksoo Choi; Sungho Shin; Wonyong Sung", "abstract": "Real-time automatic speech recognition (ASR) on mobile and embedded devices has been of great interests for many years. We present real-time speech recognition on smartphones or embedded systems by employing recurrent neural network (RNN) based acoustic models, RNN based language models, and beam-search decoding. The acoustic model is end-to-end trained with connectionist temporal classification (CTC) loss. The RNN implementation on embedded devices can suffer from excessive DRAM accesses because the parameter size of a neural network usually exceeds that of the cache memory and the parameters are used only once for each time step. To remedy this problem, we employ a multi-time step parallelization approach that computes multiple output samples at a time with the parameters fetched from the DRAM. Since the number of DRAM accesses can be reduced in proportion to the number of parallelization steps, we can achieve a high processing speed. However, conventional RNNs, such as long short-term memory (LSTM) or gated recurrent unit (GRU), do not permit multi-time step parallelization. We construct an acoustic model by combining simple recurrent units (SRUs) and depth-wise 1-dimensional convolution layers for multi-time step parallelization. Both the character and word piece models are developed for acoustic modeling, and the corresponding RNN based language models are used for beam search decoding. We achieve a competitive WER for WSJ corpus using the entire model size of around 15MB and achieve real-time speed using only a single core ARM without GPU or special hardware.", "bibtex": "@inproceedings{NEURIPS2018_42299f06,\n author = {Park, Jinhwan and Boo, Yoonho and Choi, Iksoo and Shin, Sungho and Sung, Wonyong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fully Neural Network Based Speech Recognition on Mobile and Embedded Devices},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/42299f06ee419aa5d9d07798b56779e2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/42299f06ee419aa5d9d07798b56779e2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/42299f06ee419aa5d9d07798b56779e2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/42299f06ee419aa5d9d07798b56779e2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/42299f06ee419aa5d9d07798b56779e2-Reviews.html", "metareview": "", "pdf_size": 317219, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18120673904247713138&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff": "Seoul National University; Seoul National University; Seoul National University; Seoul National University; Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/42299f06ee419aa5d9d07798b56779e2-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Fully Understanding The Hashing Trick", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11526", "id": "11526", "author_site": "Lior Kamma, Casper B. Freksen, Kasper Green Larsen", "author": "Casper B. Freksen; Lior Kamma; Kasper Green Larsen", "abstract": "Feature hashing, also known as {\\em the hashing trick}, introduced by Weinberger et al. (2009), is one of the key techniques used in scaling-up machine learning algorithms. Loosely speaking, feature hashing uses a random sparse projection matrix $A : \\mathbb{R}^n \\to \\mathbb{R}^m$ (where $m \\ll n$) in order to reduce the dimension of the data from $n$ to $m$ while approximately preserving the Euclidean norm. Every column of $A$ contains exactly one non-zero entry, equals to either $-1$ or $1$.\n\nWeinberger et al. showed tail bounds on $\\|Ax\\|_2^2$. Specifically they showed that for every $\\varepsilon, \\delta$, if $\\|x\\|_{\\infty} / \\|x\\|_2$ is sufficiently small, and $m$ is sufficiently large, then \n\\begin{equation*}\\Pr[ \\; | \\;\\|Ax\\|_2^2 - \\|x\\|_2^2\\; | < \\varepsilon \\|x\\|_2^2 \\;] \\ge 1 - \\delta \\;.\\end{equation*}\nThese bounds were later extended by Dasgupta et al. (2010) and most recently refined by Dahlgaard et al. (2017), however, the true nature of the performance of this key technique, and specifically the correct tradeoff between the pivotal parameters $\\|x\\|_{\\infty} / \\|x\\|_2, m, \\varepsilon, \\delta$ remained an open question.\n\nWe settle this question by giving tight asymptotic bounds on the exact tradeoff between the central parameters, thus providing a complete understanding of the performance of feature hashing. We complement the asymptotic bound with empirical data, which shows that the constants \"hiding\" in the asymptotic notation are, in fact, very close to $1$, thus further illustrating the tightness of the presented bounds in practice.", "bibtex": "@inproceedings{NEURIPS2018_7e837225,\n author = {Freksen, Casper B. and Kamma, Lior and Green Larsen, Kasper},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fully Understanding The Hashing Trick},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7e83722522e8aeb7512b7075311316b7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7e83722522e8aeb7512b7075311316b7-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7e83722522e8aeb7512b7075311316b7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7e83722522e8aeb7512b7075311316b7-Reviews.html", "metareview": "", "pdf_size": 756418, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6818720745951337577&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Department of Computer Science, Aarhus University, Denmark; Department of Computer Science, Aarhus University, Denmark; Department of Computer Science, Aarhus University, Denmark", "aff_domain": "cs.au.dk;cs.au.dk;cs.au.dk", "email": "cs.au.dk;cs.au.dk;cs.au.dk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7e83722522e8aeb7512b7075311316b7-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Aarhus University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://au.dk", "aff_unique_abbr": "AU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "GIANT: Globally Improved Approximate Newton Method for Distributed Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11243", "id": "11243", "author_site": "Shusen Wang, Fred Roosta, Peng Xu, Michael Mahoney", "author": "Shusen Wang; Fred Roosta; Peng Xu; Michael W. Mahoney", "abstract": "For distributed computing environment, we consider the empirical risk minimization problem and propose a distributed and communication-efficient Newton-type optimization method. At every iteration, each worker locally finds an Approximate NewTon (ANT) direction, which is sent to the main driver. The main driver, then, averages all the ANT directions received from workers to form a Globally Improved ANT (GIANT) direction. GIANT is highly communication efficient and naturally exploits the trade-offs between local computations and global communications in that more local computations result in fewer overall rounds of communications. Theoretically, we show that GIANT enjoys an improved convergence rate as compared with first-order methods and existing distributed Newton-type methods. Further, and in sharp contrast with many existing distributed Newton-type methods, as well as popular first-order methods, a highly advantageous practical feature of GIANT is that it only involves one tuning parameter. We conduct large-scale experiments on a computer cluster and, empirically, demonstrate the superior performance of GIANT.", "bibtex": "@inproceedings{NEURIPS2018_dabd8d2c,\n author = {Wang, Shusen and Roosta, Fred and Xu, Peng and Mahoney, Michael W},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {GIANT: Globally Improved Approximate Newton Method for Distributed Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dabd8d2ce74e782c65a973ef76fd540b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dabd8d2ce74e782c65a973ef76fd540b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dabd8d2ce74e782c65a973ef76fd540b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dabd8d2ce74e782c65a973ef76fd540b-Reviews.html", "metareview": "", "pdf_size": 610557, "gs_citation": 169, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13081774885859868667&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 11, "aff": "Stevens Institute of Technology; University of Queensland; Stanford University; University of California at Berkeley", "aff_domain": "stevens.edu;uq.edu.au;stanford.edu;stat.berkeley.edu", "email": "stevens.edu;uq.edu.au;stanford.edu;stat.berkeley.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dabd8d2ce74e782c65a973ef76fd540b-Abstract.html", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Stevens Institute of Technology;University of Queensland;Stanford University;University of California, Berkeley", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stevens.edu;https://www.uq.edu.au;https://www.stanford.edu;https://www.berkeley.edu", "aff_unique_abbr": "SIT;UQ;Stanford;UC Berkeley", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Stanford;Berkeley", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Australia" }, { "title": "GILBO: One Metric to Measure Them All", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11678", "id": "11678", "author_site": "Alexander Alemi, Ian Fischer", "author": "Alexander A Alemi; Ian Fischer", "abstract": "We propose a simple, tractable lower bound on the mutual information contained in the joint generative density of any latent variable generative model: the GILBO (Generative Information Lower BOund). It offers a data-independent measure of the complexity of the learned latent variable description, giving the log of the effective description length. It is well-defined for both VAEs and GANs. We compute the GILBO for 800 GANs and VAEs each trained on four datasets (MNIST, FashionMNIST, CIFAR-10 and CelebA) and discuss the results.", "bibtex": "@inproceedings{NEURIPS2018_7535bbb9,\n author = {Alemi, Alexander A and Fischer, Ian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {GILBO: One Metric to Measure Them All},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7535bbb91c8fde347ad861f293126633-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7535bbb91c8fde347ad861f293126633-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7535bbb91c8fde347ad861f293126633-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7535bbb91c8fde347ad861f293126633-Reviews.html", "metareview": "", "pdf_size": 9923895, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14349686696431672115&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Google AI; Google AI", "aff_domain": "google.com;google.com", "email": "google.com;google.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7535bbb91c8fde347ad861f293126633-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google AI", "aff_unique_url": "https://ai.google", "aff_unique_abbr": "Google AI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "GLoMo: Unsupervised Learning of Transferable Relational Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11854", "id": "11854", "author_site": "Zhilin Yang, Jake Zhao, Bhuwan Dhingra, Kaiming He, William Cohen, Russ Salakhutdinov, Yann LeCun", "author": "Zhilin Yang; Jake Zhao; Bhuwan Dhingra; Kaiming He; William W. Cohen; Ruslan Salakhutdinov; Yann LeCun", "abstract": "Modern deep transfer learning approaches have mainly focused on learning generic feature vectors from one task that are transferable to other tasks, such as word embeddings in language and pretrained convolutional features in vision. However, these approaches usually transfer unary features and largely ignore more structured graphical representations. This work explores the possibility of learning generic latent relational graphs that capture dependencies between pairs of data units (e.g., words or pixels) from large-scale unlabeled data and transferring the graphs to downstream tasks. Our proposed transfer learning framework improves performance on various tasks including question answering, natural language inference, sentiment analysis, and image classification. We also show that the learned graphs are generic enough to be transferred to different embeddings on which the graphs have not been trained (including GloVe embeddings, ELMo embeddings, and task-specific RNN hidden units), or embedding-free units such as image pixels.", "bibtex": "@inproceedings{NEURIPS2018_5dbc8390,\n author = {Yang, Zhilin and Zhao, Jake and Dhingra, Bhuwan and He, Kaiming and Cohen, William W and Salakhutdinov, Russ R and LeCun, Yann},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {GLoMo: Unsupervised Learning of Transferable Relational Graphs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5dbc8390f17e019d300d5a162c3ce3bc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5dbc8390f17e019d300d5a162c3ce3bc-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5dbc8390f17e019d300d5a162c3ce3bc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5dbc8390f17e019d300d5a162c3ce3bc-Reviews.html", "metareview": "", "pdf_size": 583867, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2917732707325476709&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Carnegie Mellon University; New York University+Facebook AI Research; Carnegie Mellon University; Facebook AI Research; Carnegie Mellon University; Carnegie Mellon University; New York University+Facebook AI Research", "aff_domain": "cs.cmu.edu;cs.nyu.com;cs.cmu.edu;fb.com;cs.cmu.edu;cs.cmu.edu;cs.nyu.com", "email": "cs.cmu.edu;cs.nyu.com;cs.cmu.edu;fb.com;cs.cmu.edu;cs.cmu.edu;cs.nyu.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5dbc8390f17e019d300d5a162c3ce3bc-Abstract.html", "aff_unique_index": "0;1+2;0;2;0;0;1+2", "aff_unique_norm": "Carnegie Mellon University;New York University;Meta", "aff_unique_dep": ";;Facebook AI Research", "aff_unique_url": "https://www.cmu.edu;https://www.nyu.edu;https://research.facebook.com", "aff_unique_abbr": "CMU;NYU;FAIR", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0;0;0;0;0;0+0", "aff_country_unique": "United States" }, { "title": "GPyTorch: Blackbox Matrix-Matrix Gaussian Process Inference with GPU Acceleration", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11728", "id": "11728", "author_site": "Jacob Gardner, Geoff Pleiss, Kilian Weinberger, David Bindel, Andrew Wilson", "author": "Jacob Gardner; Geoff Pleiss; Kilian Q. Weinberger; David Bindel; Andrew G Wilson", "abstract": "Despite advances in scalable models, the inference tools used for Gaussian processes (GPs) have yet to fully capitalize on developments in computing hardware. We present an efficient and general approach to GP inference based on Blackbox Matrix-Matrix multiplication (BBMM). BBMM inference uses a modified batched version of the conjugate gradients algorithm to derive all terms for training and inference in a single call. BBMM reduces the asymptotic complexity of exact GP inference from O(n^3) to O(n^2). Adapting this algorithm to scalable approximations and complex GP models simply requires a routine for efficient matrix-matrix multiplication with the kernel and its derivative. In addition, BBMM uses a specialized preconditioner to substantially speed up convergence. In experiments we show that BBMM effectively uses GPU hardware to dramatically accelerate both exact GP inference and scalable approximations. Additionally, we provide GPyTorch, a software platform for scalable GP inference via BBMM, built on PyTorch.", "bibtex": "@inproceedings{NEURIPS2018_27e8e171,\n author = {Gardner, Jacob and Pleiss, Geoff and Weinberger, Kilian Q and Bindel, David and Wilson, Andrew G},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {GPyTorch: Blackbox Matrix-Matrix Gaussian Process Inference with GPU Acceleration},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/27e8e17134dd7083b050476733207ea1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/27e8e17134dd7083b050476733207ea1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/27e8e17134dd7083b050476733207ea1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/27e8e17134dd7083b050476733207ea1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/27e8e17134dd7083b050476733207ea1-Reviews.html", "metareview": "", "pdf_size": 440662, "gs_citation": 1574, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15805506961047915622&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Cornell University; Cornell University; Cornell University; Cornell University; Cornell University", "aff_domain": "cornell.edu;cs.cornell.edu;cs.cornell.edu;cornell.edu;cornell.edu", "email": "cornell.edu;cs.cornell.edu;cs.cornell.edu;cornell.edu;cornell.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/27e8e17134dd7083b050476733207ea1-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Gamma-Poisson Dynamic Matrix Factorization Embedded with Metadata Influence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11567", "id": "11567", "author_site": "Trong Dinh Thac Do, Longbing Cao", "author": "Trong Dinh Thac Do; Longbing Cao", "abstract": "A conjugate Gamma-Poisson model for Dynamic Matrix Factorization incorporated with metadata influence (mGDMF for short) is proposed to effectively and efficiently model massive, sparse and dynamic data in recommendations. Modeling recommendation problems with a massive number of ratings and very sparse or even no ratings on some users/items in a dynamic setting is very demanding and poses critical challenges to well-studied matrix factorization models due to the large-scale, sparse and dynamic nature of the data. Our proposed mGDMF tackles these challenges by introducing three strategies: (1) constructing a stable Gamma-Markov chain model that smoothly drifts over time by combining both static and dynamic latent features of data; (2) incorporating the user/item metadata into the model to tackle sparse ratings; and (3) undertaking stochastic variational inference to efficiently handle massive data. mGDMF is conjugate, dynamic and scalable. Experiments show that mGDMF significantly (both effectively and efficiently) outperforms the state-of-the-art static and dynamic models on large, sparse and dynamic data.", "bibtex": "@inproceedings{NEURIPS2018_4e2a6330,\n author = {Do, Trong Dinh Thac and Cao, Longbing},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Gamma-Poisson Dynamic Matrix Factorization Embedded with Metadata Influence},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4e2a6330465c8ffcaa696a5a16639176-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4e2a6330465c8ffcaa696a5a16639176-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4e2a6330465c8ffcaa696a5a16639176-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4e2a6330465c8ffcaa696a5a16639176-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4e2a6330465c8ffcaa696a5a16639176-Reviews.html", "metareview": "", "pdf_size": 736894, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10064468484143256645&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Advanced Analytics Institute, University of Technology Sydney; Advanced Analytics Institute, University of Technology Sydney", "aff_domain": "gmail.com;gmail.com", "email": "gmail.com;gmail.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4e2a6330465c8ffcaa696a5a16639176-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Technology Sydney", "aff_unique_dep": "Advanced Analytics Institute", "aff_unique_url": "https://www.uts.edu.au", "aff_unique_abbr": "UTS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Sydney", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "title": "Gather-Excite: Exploiting Feature Context in Convolutional Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11895", "id": "11895", "author_site": "Jie Hu, Li Shen, Samuel Albanie, Gang Sun, Andrea Vedaldi", "author": "Jie Hu; Li Shen; Samuel Albanie; Gang Sun; Andrea Vedaldi", "abstract": "While the use of bottom-up local operators in convolutional neural networks (CNNs) matches well some of the statistics of natural images, it may also prevent such models from capturing contextual long-range feature interactions. In this work, we propose a simple, lightweight approach for better context exploitation in CNNs. We do so by introducing a pair of operators: gather, which efficiently aggregates feature responses from a large spatial extent, and excite, which redistributes the pooled information to local features. The operators are cheap, both in terms of number of added parameters and computational complexity, and can be integrated directly in existing architectures to improve their performance. Experiments on several datasets show that gather-excite can bring benefits comparable to increasing the depth of a CNN at a fraction of the cost. For example, we find ResNet-50 with gather-excite operators is able to outperform its 101-layer counterpart on ImageNet with no additional learnable parameters. We also propose a parametric gather-excite operator pair which yields further performance gains, relate it to the recently-introduced Squeeze-and-Excitation Networks, and analyse the effects of these changes to the CNN feature activation statistics.", "bibtex": "@inproceedings{NEURIPS2018_dc363817,\n author = {Hu, Jie and Shen, Li and Albanie, Samuel and Sun, Gang and Vedaldi, Andrea},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Gather-Excite: Exploiting Feature Context in Convolutional Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dc363817786ff182b7bc59565d864523-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dc363817786ff182b7bc59565d864523-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dc363817786ff182b7bc59565d864523-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dc363817786ff182b7bc59565d864523-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dc363817786ff182b7bc59565d864523-Reviews.html", "metareview": "", "pdf_size": 1583814, "gs_citation": 850, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9719951211536151216&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Momenta; Visual Geometry Group, University of Oxford; Visual Geometry Group, University of Oxford; Momenta; Visual Geometry Group, University of Oxford", "aff_domain": "momenta.ai;robots.ox.ac.uk;robots.ox.ac.uk;momenta.ai;robots.ox.ac.uk", "email": "momenta.ai;robots.ox.ac.uk;robots.ox.ac.uk;momenta.ai;robots.ox.ac.uk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dc363817786ff182b7bc59565d864523-Abstract.html", "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Momenta;University of Oxford", "aff_unique_dep": ";Visual Geometry Group", "aff_unique_url": "https://www.momenta.cn;https://www.ox.ac.uk", "aff_unique_abbr": "Momenta;Oxford", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Gaussian Process Conditional Density Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11248", "id": "11248", "author_site": "Vincent Dutordoir, Hugh Salimbeni, James Hensman, Marc Deisenroth", "author": "Vincent Dutordoir; Hugh Salimbeni; James Hensman; Marc Deisenroth", "abstract": "Conditional Density Estimation (CDE) models deal with estimating conditional distributions. The conditions imposed on the distribution are the inputs of the model. CDE is a challenging task as there is a fundamental trade-off between model complexity, representational capacity and overfitting. In this work, we propose to extend the model's input with latent variables and use Gaussian processes (GP) to map this augmented input onto samples from the conditional distribution. Our Bayesian approach allows for the modeling of small datasets, but we also provide the machinery for it to be applied to big data using stochastic variational inference. Our approach can be used to model densities even in sparse data regions, and allows for sharing learned structure between conditions. We illustrate the effectiveness and wide-reaching applicability of our model on a variety of real-world problems, such as spatio-temporal density estimation of taxi drop-offs, non-Gaussian noise modeling, and few-shot learning on omniglot images.", "bibtex": "@inproceedings{NEURIPS2018_6a61d423,\n author = {Dutordoir, Vincent and Salimbeni, Hugh and Hensman, James and Deisenroth, Marc},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Gaussian Process Conditional Density Estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6a61d423d02a1c56250dc23ae7ff12f3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6a61d423d02a1c56250dc23ae7ff12f3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6a61d423d02a1c56250dc23ae7ff12f3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6a61d423d02a1c56250dc23ae7ff12f3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6a61d423d02a1c56250dc23ae7ff12f3-Reviews.html", "metareview": "", "pdf_size": 587200, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16737267154273287151&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "PROWLER.io, Cambridge, UK+Imperial College London; PROWLER.io, Cambridge, UK+Imperial College London; PROWLER.io, Cambridge, UK+Imperial College London; PROWLER.io, Cambridge, UK+Imperial College London", "aff_domain": "prowler.io;prowler.io;prowler.io;prowler.io", "email": "prowler.io;prowler.io;prowler.io;prowler.io", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6a61d423d02a1c56250dc23ae7ff12f3-Abstract.html", "aff_unique_index": "0+1;0+1;0+1;0+1", "aff_unique_norm": "PROWLER.io;Imperial College London", "aff_unique_dep": ";", "aff_unique_url": "https://prowler.io;https://www.imperial.ac.uk", "aff_unique_abbr": ";ICL", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0+0;0+0;0+0;0+0", "aff_country_unique": "United Kingdom" }, { "title": "Gaussian Process Prior Variational Autoencoders", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11981", "id": "11981", "author_site": "Francesco Paolo Casale, Adrian Dalca, Luca Saglietti, Jennifer Listgarten, Nicolo Fusi", "author": "Francesco Paolo Casale; Adrian Dalca; Luca Saglietti; Jennifer Listgarten; Nicolo Fusi", "abstract": "Variational autoencoders (VAE) are a powerful and widely-used class of models to learn complex data distributions in an unsupervised fashion. One important limitation of VAEs is the prior assumption that latent sample representations are independent and identically distributed. However, for many important datasets, such as time-series of images, this assumption is too strong: accounting for covariances between samples, such as those in time, can yield to a more appropriate model specification and improve performance in downstream tasks. In this work, we introduce a new model, the Gaussian Process (GP) Prior Variational Autoencoder (GPPVAE), to specifically address this issue. The GPPVAE aims to combine the power of VAEs with the ability to model correlations afforded by GP priors. To achieve efficient inference in this new class of models, we leverage structure in the covariance matrix, and introduce a new stochastic backpropagation strategy that allows for computing stochastic gradients in a distributed and low-memory fashion. We show that our method outperforms conditional VAEs (CVAEs) and an adaptation of standard VAEs in two image data applications.", "bibtex": "@inproceedings{NEURIPS2018_1c336b80,\n author = {Casale, Francesco Paolo and Dalca, Adrian and Saglietti, Luca and Listgarten, Jennifer and Fusi, Nicolo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Gaussian Process Prior Variational Autoencoders},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1c336b8080f82bcc2cd2499b4c57261d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1c336b8080f82bcc2cd2499b4c57261d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1c336b8080f82bcc2cd2499b4c57261d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1c336b8080f82bcc2cd2499b4c57261d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1c336b8080f82bcc2cd2499b4c57261d-Reviews.html", "metareview": "", "pdf_size": 2792345, "gs_citation": 165, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7294538008539835502&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Microsoft Research New England, Cambridge (MA), USA; Computer Science and Artificial Intelligence Lab, MIT, Cambridge (MA), USA+Martinos Center for Biomedical Imaging, MGH, HMS, Boston (MA), USA; Microsoft Research New England, Cambridge (MA), USA+Italian Institute for Genomic Medicine, Torino, Italy; EECS Department, University of California, Berkeley (CA), USA; Microsoft Research New England, Cambridge (MA), USA", "aff_domain": "microsoft.com; ; ; ; ", "email": "microsoft.com; ; ; ; ", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1c336b8080f82bcc2cd2499b4c57261d-Abstract.html", "aff_unique_index": "0;1+2;0+3;4;0", "aff_unique_norm": "Microsoft;Massachusetts Institute of Technology;MGH;Italian Institute for Genomic Medicine;University of California, Berkeley", "aff_unique_dep": "Microsoft Research New England;Computer Science and Artificial Intelligence Lab;Martinos Center for Biomedical Imaging;;EECS Department", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/new-england;https://web.mit.edu;https://www.mgh.harvard.edu;;https://www.berkeley.edu", "aff_unique_abbr": "MSR NE;MIT;MGH;;UC Berkeley", "aff_campus_unique_index": "0;0+1;0;3;0", "aff_campus_unique": "Cambridge;Boston;;Berkeley", "aff_country_unique_index": "0;0+0;0+1;0;0", "aff_country_unique": "United States;Italy" }, { "title": "Gen-Oja: Simple & Efficient Algorithm for Streaming Generalized Eigenvector Computation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11676", "id": "11676", "author_site": "Kush Bhatia, Aldo Pacchiano, Nicolas Flammarion, Peter Bartlett, Michael Jordan", "author": "Kush Bhatia; Aldo Pacchiano; Nicolas Flammarion; Peter L Bartlett; Michael I Jordan", "abstract": "In this paper, we study the problems of principle Generalized Eigenvector computation and Canonical Correlation Analysis in the stochastic setting. We propose a simple and efficient algorithm for these problems. We prove the global convergence of our algorithm, borrowing ideas from the theory of fast-mixing Markov chains and two-Time-Scale Stochastic Approximation, showing that it achieves the optimal rate of convergence. In the process, we develop tools for understanding stochastic processes with Markovian noise which might be of independent interest.", "bibtex": "@inproceedings{NEURIPS2018_1b318124,\n author = {Bhatia, Kush and Pacchiano, Aldo and Flammarion, Nicolas and Bartlett, Peter L and Jordan, Michael I},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Gen-Oja: Simple \\& Efficient Algorithm for Streaming Generalized Eigenvector Computation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1b318124e37af6d74a03501474f44ea1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1b318124e37af6d74a03501474f44ea1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1b318124e37af6d74a03501474f44ea1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1b318124e37af6d74a03501474f44ea1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1b318124e37af6d74a03501474f44ea1-Reviews.html", "metareview": "", "pdf_size": 390349, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1226941607056494205&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "aff": "University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;cs.berkeley.edu", "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;cs.berkeley.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1b318124e37af6d74a03501474f44ea1-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Generalisation in humans and deep neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11725", "id": "11725", "author_site": "Robert Geirhos, Carlos R. M. Temme, Jonas Rauber, Heiko H. Sch\u00fctt, Matthias Bethge, Felix A. Wichmann", "author": "Robert Geirhos; Carlos R. M. Temme; Jonas Rauber; Heiko H. Sch\u00fctt; Matthias Bethge; Felix A. Wichmann", "abstract": "We compare the robustness of humans and current convolutional deep neural networks (DNNs) on object recognition under twelve different types of image degradations. First, using three well known DNNs (ResNet-152, VGG-19, GoogLeNet) we find the human visual system to be more robust to nearly all of the tested image manipulations, and we observe progressively diverging classification error-patterns between humans and DNNs when the signal gets weaker. Secondly, we show that DNNs trained directly on distorted images consistently surpass human performance on the exact distortion types they were trained on, yet they display extremely poor generalisation abilities when tested on other distortion types. For example, training on salt-and-pepper noise does not imply robustness on uniform white noise and vice versa. Thus, changes in the noise distribution between training and testing constitutes a crucial challenge to deep learning vision systems that can be systematically addressed in a lifelong machine learning approach. Our new dataset consisting of 83K carefully measured human psychophysical trials provide a useful reference for lifelong robustness against image degradations set by the human visual system.", "bibtex": "@inproceedings{NEURIPS2018_0937fb58,\n author = {Geirhos, Robert and Temme, Carlos R. M. and Rauber, Jonas and Sch\\\"{u}tt, Heiko H. and Bethge, Matthias and Wichmann, Felix A.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generalisation in humans and deep neural networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0937fb5864ed06ffb59ae5f9b5ed67a9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0937fb5864ed06ffb59ae5f9b5ed67a9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0937fb5864ed06ffb59ae5f9b5ed67a9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0937fb5864ed06ffb59ae5f9b5ed67a9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0937fb5864ed06ffb59ae5f9b5ed67a9-Reviews.html", "metareview": "", "pdf_size": 2195109, "gs_citation": 829, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16577111803298526010&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Neural Information Processing Group, University of T\u00fcbingen + Centre for Integrative Neuroscience, University of T\u00fcbingen + International Max Planck Research School for Intelligent Systems; Neural Information Processing Group, University of T\u00fcbingen + Centre for Integrative Neuroscience, University of T\u00fcbingen + International Max Planck Research School for Intelligent Systems; Centre for Integrative Neuroscience, University of T\u00fcbingen + International Max Planck Research School for Intelligent Systems; Neural Information Processing Group, University of T\u00fcbingen + Graduate School of Neural and Behavioural Sciences, University of T\u00fcbingen + Department of Psychology, University of Potsdam; Centre for Integrative Neuroscience, University of T\u00fcbingen + Bernstein Center for Computational Neuroscience T\u00fcbingen + Max Planck Institute for Biological Cybernetics; Neural Information Processing Group, University of T\u00fcbingen + Centre for Integrative Neuroscience, University of T\u00fcbingen + Bernstein Center for Computational Neuroscience T\u00fcbingen + Max Planck Institute for Intelligent Systems", "aff_domain": "bethgelab.org; ; ; ; ; ", "email": "bethgelab.org; ; ; ; ; ", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0937fb5864ed06ffb59ae5f9b5ed67a9-Abstract.html", "aff_unique_index": "0+0+1;0+0+1;0+1;0+0+2;0+3+4;0+0+3+1", "aff_unique_norm": "University of T\u00fcbingen;Max Planck Institute for Intelligent Systems;University of Potsdam;Bernstein Center for Computational Neuroscience;Max Planck Institute for Biological Cybernetics", "aff_unique_dep": "Neural Information Processing Group;Intelligent Systems;Department of Psychology;Computational Neuroscience;Biological Cybernetics", "aff_unique_url": "https://www.uni-tuebingen.de;https://www.mpitue.mpg.de;https://www.uni-potsdam.de;https://www.bccn-tuebingen.de;https://www.biocybernetics.mpg.de", "aff_unique_abbr": ";MPI-IS;UP;BCCN;MPIBC", "aff_campus_unique_index": ";;;;1;1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0+0+0;0+0+0;0+0;0+0+0;0+0+0;0+0+0+0", "aff_country_unique": "Germany" }, { "title": "Generalisation of structural knowledge in the hippocampal-entorhinal system", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11811", "id": "11811", "author_site": "James Whittington, Timothy Muller, Shirely Mark, Caswell Barry, Tim Behrens", "author": "James Whittington; Timothy Muller; Shirely Mark; Caswell Barry; Tim Behrens", "abstract": "A central problem to understanding intelligence is the concept of generalisation. This allows previously learnt structure to be exploited to solve tasks in novel situations differing in their particularities. We take inspiration from neuroscience, specifically the hippocampal-entorhinal system known to be important for generalisation. We propose that to generalise structural knowledge, the representations of the structure of the world, i.e. how entities in the world relate to each other, need to be separated from representations of the entities themselves. We show, under these principles, artificial neural networks embedded with hierarchy and fast Hebbian memory, can learn the statistics of memories and generalise structural knowledge. Spatial neuronal representations mirroring those found in the brain emerge, suggesting spatial cognition is an instance of more general organising principles. We further unify many entorhinal cell types as basis functions for constructing transition graphs, and show these representations effectively utilise memories. We experimentally support model assumptions, showing a preserved relationship between entorhinal grid and hippocampal place cells across environments.", "bibtex": "@inproceedings{NEURIPS2018_99064ba6,\n author = {Whittington, James and Muller, Timothy and Mark, Shirely and Barry, Caswell and Behrens, Tim},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generalisation of structural knowledge in the hippocampal-entorhinal system},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/99064ba6631e279d4a74622df99657d6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/99064ba6631e279d4a74622df99657d6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/99064ba6631e279d4a74622df99657d6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/99064ba6631e279d4a74622df99657d6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/99064ba6631e279d4a74622df99657d6-Reviews.html", "metareview": "", "pdf_size": 2064915, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11437235848327420569&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "University of Oxford, UK; University of Oxford, UK; University College London, UK; University College London, UK; University of Oxford, UK", "aff_domain": "magd.ox.ac.uk;gmail.com;ucl.ac.uk;ucl.ac.uk;fmrib.ox.ac.uk", "email": "magd.ox.ac.uk;gmail.com;ucl.ac.uk;ucl.ac.uk;fmrib.ox.ac.uk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/99064ba6631e279d4a74622df99657d6-Abstract.html", "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of Oxford;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.ucl.ac.uk", "aff_unique_abbr": "Oxford;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Generalization Bounds for Uniformly Stable Algorithms", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11926", "id": "11926", "author_site": "Vitaly Feldman, Jan Vondrak", "author": "Vitaly Feldman; Jan Vondrak", "abstract": "Uniform stability of a learning algorithm is a classical notion of algorithmic stability introduced to derive high-probability bounds on the generalization error (Bousquet and Elisseeff, 2002). Specifically, for a loss function with range bounded in $[0,1]$, the generalization error of $\\gamma$-uniformly stable learning algorithm on $n$ samples is known to be at most $O((\\gamma +1/n) \\sqrt{n \\log(1/\\delta)})$ with probability at least $1-\\delta$. Unfortunately, this bound does not lead to meaningful generalization bounds in many common settings where $\\gamma \\geq 1/\\sqrt{n}$. At the same time the bound is known to be tight only when $\\gamma = O(1/n)$.\n Here we prove substantially stronger generalization bounds for uniformly stable algorithms without any additional assumptions. First, we show that the generalization error in this setting is at most $O(\\sqrt{(\\gamma + 1/n) \\log(1/\\delta)})$ with probability at least $1-\\delta$. In addition, we prove a tight bound of $O(\\gamma^2 + 1/n)$ on the second moment of the generalization error. The best previous bound on the second moment of the generalization error is $O(\\gamma + 1/n)$. Our proofs are based on new analysis techniques and our results imply substantially stronger generalization guarantees for several well-studied algorithms.", "bibtex": "@inproceedings{NEURIPS2018_05a62416,\n author = {Feldman, Vitaly and Vondrak, Jan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generalization Bounds for Uniformly Stable Algorithms},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/05a624166c8eb8273b8464e8d9cb5bd9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/05a624166c8eb8273b8464e8d9cb5bd9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/05a624166c8eb8273b8464e8d9cb5bd9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/05a624166c8eb8273b8464e8d9cb5bd9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/05a624166c8eb8273b8464e8d9cb5bd9-Reviews.html", "metareview": "", "pdf_size": 312390, "gs_citation": 103, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8353771892761221477&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Google Brain; Stanford University", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/05a624166c8eb8273b8464e8d9cb5bd9-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Google;Stanford University", "aff_unique_dep": "Google Brain;", "aff_unique_url": "https://brain.google.com;https://www.stanford.edu", "aff_unique_abbr": "Google Brain;Stanford", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Generalized Cross Entropy Loss for Training Deep Neural Networks with Noisy Labels", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11838", "id": "11838", "author_site": "Zhilu Zhang, Mert Sabuncu", "author": "Zhilu Zhang; Mert Sabuncu", "abstract": "Deep neural networks (DNNs) have achieved tremendous success in a variety of applications across many disciplines. Yet, their superior performance comes with the expensive cost of requiring correctly annotated large-scale datasets. Moreover, due to DNNs' rich capacity, errors in training labels can hamper performance. To combat this problem, mean absolute error (MAE) has recently been proposed as a noise-robust alternative to the commonly-used categorical cross entropy (CCE) loss. However, as we show in this paper, MAE can perform poorly with DNNs and large-scale datasets. Here, we present a theoretically grounded set of noise-robust loss functions that can be seen as a generalization of MAE and CCE. Proposed loss functions can be readily applied with any existing DNN architecture and algorithm, while yielding good performance in a wide range of noisy label scenarios. We report results from experiments conducted with CIFAR-10, CIFAR-100 and FASHION-MNIST datasets and synthetically generated noisy labels.", "bibtex": "@inproceedings{NEURIPS2018_f2925f97,\n author = {Zhang, Zhilu and Sabuncu, Mert},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generalized Cross Entropy Loss for Training Deep Neural Networks with Noisy Labels},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f2925f97bc13ad2852a7a551802feea0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f2925f97bc13ad2852a7a551802feea0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f2925f97bc13ad2852a7a551802feea0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f2925f97bc13ad2852a7a551802feea0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f2925f97bc13ad2852a7a551802feea0-Reviews.html", "metareview": "", "pdf_size": 734639, "gs_citation": 3643, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4123558858259122185&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Electrical and Computer Engineering + Meinig School of Biomedical Engineering; Electrical and Computer Engineering + Meinig School of Biomedical Engineering", "aff_domain": "cornell.edu;cornell.edu", "email": "cornell.edu;cornell.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f2925f97bc13ad2852a7a551802feea0-Abstract.html", "aff_unique_index": "1;1", "aff_unique_norm": ";Cornell University", "aff_unique_dep": "Electrical and Computer Engineering;Meinig School of Biomedical Engineering", "aff_unique_url": ";https://www.bme.cornell.edu", "aff_unique_abbr": ";Cornell BME", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";United States" }, { "title": "Generalized Inverse Optimization through Online Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11036", "id": "11036", "author_site": "Chaosheng Dong, Yiran Chen, Bo Zeng", "author": "Chaosheng Dong; Yiran Chen; Bo Zeng", "abstract": "Inverse optimization is a powerful paradigm for learning preferences and restrictions that explain the behavior of a decision maker, based on a set of external signal and the corresponding decision pairs. However, most inverse optimization algorithms are designed specifically in batch setting, where all the data is available in advance. As a consequence, there has been rare use of these methods in an online setting suitable for real-time applications. In this paper, we propose a general framework for inverse optimization through online learning. Specifically, we develop an online learning algorithm that uses an implicit update rule which can handle noisy data. Moreover, under additional regularity assumptions in terms of the data and the model, we prove that our algorithm converges at a rate of $\\mathcal{O}(1/\\sqrt{T})$ and is statistically consistent. In our experiments, we show the online learning approach can learn the parameters with great accuracy and is very robust to noises, and achieves a dramatic improvement in computational efficacy over the batch learning approach.", "bibtex": "@inproceedings{NEURIPS2018_28dd2c79,\n author = {Dong, Chaosheng and Chen, Yiran and Zeng, Bo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generalized Inverse Optimization through Online Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/28dd2c7955ce926456240b2ff0100bde-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/28dd2c7955ce926456240b2ff0100bde-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/28dd2c7955ce926456240b2ff0100bde-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/28dd2c7955ce926456240b2ff0100bde-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/28dd2c7955ce926456240b2ff0100bde-Reviews.html", "metareview": "", "pdf_size": 2646952, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7620520290231892583&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Industrial Engineering, University of Pittsburgh; Department of Electrical and Computer Engineering, Duke University; Department of Industrial Engineering, University of Pittsburgh", "aff_domain": "pitt.edu;duke.edu;pitt.edu", "email": "pitt.edu;duke.edu;pitt.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/28dd2c7955ce926456240b2ff0100bde-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Pittsburgh;Duke University", "aff_unique_dep": "Department of Industrial Engineering;Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.pitt.edu;https://www.duke.edu", "aff_unique_abbr": "Pitt;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Generalized Zero-Shot Learning with Deep Calibration Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11213", "id": "11213", "author_site": "Shichen Liu, Mingsheng Long, Jianmin Wang, Michael Jordan", "author": "Shichen Liu; Mingsheng Long; Jianmin Wang; Michael I Jordan", "abstract": "A technical challenge of deep learning is recognizing target classes without seen data. Zero-shot learning leverages semantic representations such as attributes or class prototypes to bridge source and target classes. Existing standard zero-shot learning methods may be prone to overfitting the seen data of source classes as they are blind to the semantic representations of target classes. In this paper, we study generalized zero-shot learning that assumes accessible to target classes for unseen data during training, and prediction on unseen data is made by searching on both source and target classes. We propose a novel Deep Calibration Network (DCN) approach towards this generalized zero-shot learning paradigm, which enables simultaneous calibration of deep networks on the confidence of source classes and uncertainty of target classes. Our approach maps visual features of images and semantic representations of class prototypes to a common embedding space such that the compatibility of seen data to both source and target classes are maximized. We show superior accuracy of our approach over the state of the art on benchmark datasets for generalized zero-shot learning, including AwA, CUB, SUN, and aPY.", "bibtex": "@inproceedings{NEURIPS2018_1587965f,\n author = {Liu, Shichen and Long, Mingsheng and Wang, Jianmin and Jordan, Michael I},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generalized Zero-Shot Learning with Deep Calibration Network},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1587965fb4d4b5afe8428a4a024feb0d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1587965fb4d4b5afe8428a4a024feb0d-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1587965fb4d4b5afe8428a4a024feb0d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1587965fb4d4b5afe8428a4a024feb0d-Reviews.html", "metareview": "", "pdf_size": 1372807, "gs_citation": 301, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2312987665601213158&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "School of Software, Tsinghua University, China + KLiss, MOE + BNRist + Research Center for Big Data, Tsinghua University, China; School of Software, Tsinghua University, China + KLiss, MOE + BNRist + Research Center for Big Data, Tsinghua University, China; School of Software, Tsinghua University, China + KLiss, MOE + BNRist + Research Center for Big Data, Tsinghua University, China; University of California, Berkeley, Berkeley, USA", "aff_domain": "gmail.com;tsinghua.edu.cn;tsinghua.edu.cn;berkeley.edu", "email": "gmail.com;tsinghua.edu.cn;tsinghua.edu.cn;berkeley.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1587965fb4d4b5afe8428a4a024feb0d-Abstract.html", "aff_unique_index": "0+1+2+0;0+1+2+0;0+1+2+0;3", "aff_unique_norm": "Tsinghua University;Ministry of Education;BNRist;University of California, Berkeley", "aff_unique_dep": "School of Software;;;", "aff_unique_url": "https://www.tsinghua.edu.cn;;;https://www.berkeley.edu", "aff_unique_abbr": "THU;MOE;;UC Berkeley", "aff_campus_unique_index": ";;;1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0+1+0;0+1+0;0+1+0;3", "aff_country_unique": "China;Unknown;;United States" }, { "title": "Generalizing Graph Matching beyond Quadratic Assignment Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11107", "id": "11107", "author_site": "Tianshu Yu, Junchi Yan, Yilin Wang, Wei Liu, baoxin Li", "author": "Tianshu Yu; Junchi Yan; Yilin Wang; Wei Liu; baoxin Li", "abstract": "Graph matching has received persistent attention over decades, which can be formulated as a quadratic assignment problem (QAP). We show that a large family of functions, which we define as Separable Functions, can approximate discrete graph matching in the continuous domain asymptotically by varying the approximation controlling parameters. We also study the properties of global optimality and devise convex/concave-preserving extensions to the widely used Lawler's QAP form. Our theoretical findings show the potential for deriving new algorithms and techniques for graph matching. We deliver solvers based on two specific instances of Separable Functions, and the state-of-the-art performance of our method is verified on popular benchmarks.", "bibtex": "@inproceedings{NEURIPS2018_51d92be1,\n author = {Yu, Tianshu and Yan, Junchi and Wang, Yilin and Liu, Wei and Li, baoxin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generalizing Graph Matching beyond Quadratic Assignment Model},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/51d92be1c60d1db1d2e5e7a07da55b26-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/51d92be1c60d1db1d2e5e7a07da55b26-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/51d92be1c60d1db1d2e5e7a07da55b26-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/51d92be1c60d1db1d2e5e7a07da55b26-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/51d92be1c60d1db1d2e5e7a07da55b26-Reviews.html", "metareview": "", "pdf_size": 4586575, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7341831149208532227&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Arizona State University; Shanghai Jiao Tong University; Arizona State University + Adobe; Tecent AI Lab; Arizona State University", "aff_domain": "asu.edu;sjtu.edu.cn;adobe.com;columbia.edu;asu.edu", "email": "asu.edu;sjtu.edu.cn;adobe.com;columbia.edu;asu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/51d92be1c60d1db1d2e5e7a07da55b26-Abstract.html", "aff_unique_index": "0;1;0+2;3;0", "aff_unique_norm": "Arizona State University;Shanghai Jiao Tong University;Adobe;Tencent", "aff_unique_dep": ";;Adobe Inc.;Tencent AI Lab", "aff_unique_url": "https://www.asu.edu;https://www.sjtu.edu.cn;https://www.adobe.com;https://ai.tencent.com", "aff_unique_abbr": "ASU;SJTU;Adobe;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0+0;1;0", "aff_country_unique": "United States;China" }, { "title": "Generalizing Point Embeddings using the Wasserstein Space of Elliptical Distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11969", "id": "11969", "author_site": "Boris Muzellec, Marco Cuturi", "author": "Boris Muzellec; Marco Cuturi", "abstract": "Embedding complex objects as vectors in low dimensional spaces is a longstanding problem in machine learning. We propose in this work an extension of that approach, which consists in embedding objects as elliptical probability distributions, namely distributions whose densities have elliptical level sets. We endow these measures with the 2-Wasserstein metric, with two important benefits: (i) For such measures, the squared 2-Wasserstein metric has a closed form, equal to a weighted sum of the squared Euclidean distance between means and the squared Bures metric between covariance matrices. The latter is a Riemannian metric between positive semi-definite matrices, which turns out to be Euclidean on a suitable factor representation of such matrices, which is valid on the entire geodesic between these matrices. (ii) The 2-Wasserstein distance boils down to the usual Euclidean metric when comparing Diracs, and therefore provides a natural framework to extend point embeddings. We show that for these reasons Wasserstein elliptical embeddings are more intuitive and yield tools that are better behaved numerically than the alternative choice of Gaussian embeddings with the Kullback-Leibler divergence. In particular, and unlike previous work based on the KL geometry, we learn elliptical distributions that are not necessarily diagonal. We demonstrate the advantages of elliptical embeddings by using them for visualization, to compute embeddings of words, and to reflect entailment or hypernymy.", "bibtex": "@inproceedings{NEURIPS2018_b613e70f,\n author = {Muzellec, Boris and Cuturi, Marco},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generalizing Point Embeddings using the Wasserstein Space of Elliptical Distributions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b613e70fd9f59310cf0a8d33de3f2800-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b613e70fd9f59310cf0a8d33de3f2800-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b613e70fd9f59310cf0a8d33de3f2800-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b613e70fd9f59310cf0a8d33de3f2800-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b613e70fd9f59310cf0a8d33de3f2800-Reviews.html", "metareview": "", "pdf_size": 3521171, "gs_citation": 106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3601826070675882278&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff": "CREST, ENSAE; Google Brain+CREST, ENSAE", "aff_domain": "ensae.fr;google.com", "email": "ensae.fr;google.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b613e70fd9f59310cf0a8d33de3f2800-Abstract.html", "aff_unique_index": "0;1+0", "aff_unique_norm": "CREST;Google", "aff_unique_dep": ";Google Brain", "aff_unique_url": "https://www.crest.fr;https://brain.google.com", "aff_unique_abbr": "CREST;Google Brain", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1+0", "aff_country_unique": "France;United States" }, { "title": "Generalizing Tree Probability Estimation via Bayesian Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11160", "id": "11160", "author_site": "Cheng Zhang, Frederick A Matsen IV", "author": "Cheng Zhang; Frederick A Matsen IV", "abstract": "Probability estimation is one of the fundamental tasks in statistics and machine learning. However, standard methods for probability estimation on discrete objects do not handle object structure in a satisfactory manner. In this paper, we derive a general Bayesian network formulation for probability estimation on leaf-labeled trees that enables flexible approximations which can generalize beyond observations. We show that efficient algorithms for learning Bayesian networks can be easily extended to probability estimation on this challenging structured space. Experiments on both synthetic and real data show that our methods greatly outperform the current practice of using the empirical distribution, as well as a previous effort for probability estimation on trees.", "bibtex": "@inproceedings{NEURIPS2018_b137fdd1,\n author = {Zhang, Cheng and Matsen IV, Frederick A},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generalizing Tree Probability Estimation via Bayesian Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b137fdd1f79d56c7edf3365fea7520f2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b137fdd1f79d56c7edf3365fea7520f2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b137fdd1f79d56c7edf3365fea7520f2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b137fdd1f79d56c7edf3365fea7520f2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b137fdd1f79d56c7edf3365fea7520f2-Reviews.html", "metareview": "", "pdf_size": 575088, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17096075908350325992&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Computational Biology Program, Fred Hutchinson Cancer Research Center, Seattle, WA 98109; Computational Biology Program, Fred Hutchinson Cancer Research Center, Seattle, WA 98109", "aff_domain": "fredhutch.org;fredhutch.org", "email": "fredhutch.org;fredhutch.org", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b137fdd1f79d56c7edf3365fea7520f2-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Fred Hutchinson Cancer Research Center", "aff_unique_dep": "Computational Biology Program", "aff_unique_url": "https://www.fredhutch.org", "aff_unique_abbr": "Fred Hutch", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Generalizing to Unseen Domains via Adversarial Data Augmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11521", "id": "11521", "author_site": "Riccardo Volpi, Hongseok Namkoong, Ozan Sener, John Duchi, Vittorio Murino, Silvio Savarese", "author": "Riccardo Volpi; Hongseok Namkoong; Ozan Sener; John C. Duchi; Vittorio Murino; Silvio Savarese", "abstract": "We are concerned with learning models that generalize well to different unseen\ndomains. We consider a worst-case formulation over data distributions that are\nnear the source domain in the feature space. Only using training data from a single\nsource distribution, we propose an iterative procedure that augments the dataset\nwith examples from a fictitious target domain that is \"hard\" under the current model. We show that our iterative scheme is an adaptive data augmentation method where we append adversarial examples at each iteration. For softmax losses, we show that our method is a data-dependent regularization scheme that behaves differently from classical regularizers that regularize towards zero (e.g., ridge or lasso). On digit recognition and semantic segmentation tasks, our method learns models improve performance across a range of a priori unknown target domains.", "bibtex": "@inproceedings{NEURIPS2018_1d94108e,\n author = {Volpi, Riccardo and Namkoong, Hongseok and Sener, Ozan and Duchi, John C and Murino, Vittorio and Savarese, Silvio},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generalizing to Unseen Domains via Adversarial Data Augmentation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1d94108e907bb8311d8802b48fd54b4a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1d94108e907bb8311d8802b48fd54b4a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1d94108e907bb8311d8802b48fd54b4a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1d94108e907bb8311d8802b48fd54b4a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1d94108e907bb8311d8802b48fd54b4a-Reviews.html", "metareview": "", "pdf_size": 646147, "gs_citation": 994, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3314749587084034699&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1d94108e907bb8311d8802b48fd54b4a-Abstract.html" }, { "title": "Generating Informative and Diverse Conversational Responses via Adversarial Information Maximization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11194", "id": "11194", "author_site": "Yizhe Zhang, Michel Galley, Jianfeng Gao, Zhe Gan, Xiujun Li, Chris Brockett, Bill Dolan", "author": "Yizhe Zhang; Michel Galley; Jianfeng Gao; Zhe Gan; Xiujun Li; Chris Brockett; Bill Dolan", "abstract": "Responses generated by neural conversational models tend to lack informativeness and diversity. We present Adversarial Information Maximization (AIM), an adversarial learning framework that addresses these two related but distinct problems. To foster response diversity, we leverage adversarial training that allows distributional matching of synthetic and real responses. To improve informativeness, our framework explicitly optimizes a variational lower bound on pairwise mutual information between query and response. Empirical results from automatic and human evaluations demonstrate that our methods significantly boost informativeness and diversity.", "bibtex": "@inproceedings{NEURIPS2018_23ce1851,\n author = {Zhang, Yizhe and Galley, Michel and Gao, Jianfeng and Gan, Zhe and Li, Xiujun and Brockett, Chris and Dolan, Bill},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generating Informative and Diverse Conversational Responses via Adversarial Information Maximization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/23ce1851341ec1fa9e0c259de10bf87c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/23ce1851341ec1fa9e0c259de10bf87c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/23ce1851341ec1fa9e0c259de10bf87c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/23ce1851341ec1fa9e0c259de10bf87c-Reviews.html", "metareview": "", "pdf_size": 683866, "gs_citation": 326, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8800971352933292772&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Microsoft Research, Redmond, WA, USA; Microsoft Research, Redmond, WA, USA; Microsoft Research, Redmond, WA, USA; Microsoft Research, Redmond, WA, USA; Microsoft Research, Redmond, WA, USA; Microsoft Research, Redmond, WA, USA; Microsoft Research, Redmond, WA, USA", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "email": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/23ce1851341ec1fa9e0c259de10bf87c-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Redmond", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Generative Neural Machine Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11151", "id": "11151", "author_site": "Harshil Shah, David Barber", "author": "Harshil Shah; David Barber", "abstract": "We introduce Generative Neural Machine Translation (GNMT), a latent variable architecture which is designed to model the semantics of the source and target sentences. We modify an encoder-decoder translation model by adding a latent variable as a language agnostic representation which is encouraged to learn the meaning of the sentence. GNMT achieves competitive BLEU scores on pure translation tasks, and is superior when there are missing words in the source sentence. We augment the model to facilitate multilingual translation and semi-supervised learning without adding parameters. This framework significantly reduces overfitting when there is limited paired data available, and is effective for translating between pairs of languages not seen during training.", "bibtex": "@inproceedings{NEURIPS2018_e4bb4c51,\n author = {Shah, Harshil and Barber, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generative Neural Machine Translation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e4bb4c5173c2ce17fd8fcd40041c068f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e4bb4c5173c2ce17fd8fcd40041c068f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e4bb4c5173c2ce17fd8fcd40041c068f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e4bb4c5173c2ce17fd8fcd40041c068f-Reviews.html", "metareview": "", "pdf_size": 170773, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=656975193760341377&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "University College London+Alan Turing Institute+reinfer.io; University College London+Alan Turing Institute+reinfer.io", "aff_domain": "; ", "email": "; ", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e4bb4c5173c2ce17fd8fcd40041c068f-Abstract.html", "aff_unique_index": "0+1+2;0+1+2", "aff_unique_norm": "University College London;Alan Turing Institute;Reinfer", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.turing.ac.uk;https://www.reinfer.io", "aff_unique_abbr": "UCL;ATI;", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0+0+1;0+0+1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Generative Probabilistic Novelty Detection with Adversarial Autoencoders", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11658", "id": "11658", "author_site": "Stanislav Pidhorskyi, Ranya Almohsen, Gianfranco Doretto", "author": "Stanislav Pidhorskyi; Ranya Almohsen; Gianfranco Doretto", "abstract": "Novelty detection is the problem of identifying whether a new data point is considered to be an inlier or an outlier. We assume that training data is available to describe only the inlier distribution. Recent approaches primarily leverage deep encoder-decoder network architectures to compute a reconstruction error that is used to either compute a novelty score or to train a one-class classifier. While we too leverage a novel network of that kind, we take a probabilistic approach and effectively compute how likely it is that a sample was generated by the inlier distribution. We achieve this with two main contributions. First, we make the computation of the novelty probability feasible because we linearize the parameterized manifold capturing the underlying structure of the inlier distribution, and show how the probability factorizes and can be computed with respect to local coordinates of the manifold tangent space. Second, we improve the training of the autoencoder network. An extensive set of results show that the approach achieves state-of-the-art performance on several benchmark datasets.", "bibtex": "@inproceedings{NEURIPS2018_5421e013,\n author = {Pidhorskyi, Stanislav and Almohsen, Ranya and Doretto, Gianfranco},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generative Probabilistic Novelty Detection with Adversarial Autoencoders},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5421e013565f7f1afa0cfe8ad87a99ab-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5421e013565f7f1afa0cfe8ad87a99ab-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5421e013565f7f1afa0cfe8ad87a99ab-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5421e013565f7f1afa0cfe8ad87a99ab-Reviews.html", "metareview": "", "pdf_size": 2947305, "gs_citation": 446, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13335383760622553502&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5421e013565f7f1afa0cfe8ad87a99ab-Abstract.html" }, { "title": "Generative modeling for protein structures", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11721", "id": "11721", "author_site": "Namrata Anand, Possu Huang", "author": "Namrata Anand; Possu Huang", "abstract": "Analyzing the structure and function of proteins is a key part of understanding biology at the molecular and cellular level. In addition, a major engineering challenge is to design new proteins in a principled and methodical way. Current computational modeling methods for protein design are slow and often require human oversight and intervention. Here, we apply Generative Adversarial Networks (GANs) to the task of generating protein structures, toward application in fast de novo protein design. We encode protein structures in terms of pairwise distances between alpha-carbons on the protein backbone, which eliminates the need for the generative model to learn translational and rotational symmetries. We then introduce a convex formulation of corruption-robust 3D structure recovery to fold the protein structures from generated pairwise distance maps, and solve these problems using the Alternating Direction Method of Multipliers. We test the effectiveness of our models by predicting completions of corrupted protein structures and show that the method is capable of quickly producing structurally plausible solutions.", "bibtex": "@inproceedings{NEURIPS2018_afa299a4,\n author = {Anand, Namrata and Huang, Possu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generative modeling for protein structures},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/afa299a4d1d8c52e75dd8a24c3ce534f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/afa299a4d1d8c52e75dd8a24c3ce534f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/afa299a4d1d8c52e75dd8a24c3ce534f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/afa299a4d1d8c52e75dd8a24c3ce534f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/afa299a4d1d8c52e75dd8a24c3ce534f-Reviews.html", "metareview": "", "pdf_size": 6169264, "gs_citation": 224, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9397265014771665250&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Bioengineering Department, Stanford; Bioengineering Department, Stanford", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/afa299a4d1d8c52e75dd8a24c3ce534f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Bioengineering Department", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Genetic-Gated Networks for Deep Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11188", "id": "11188", "author_site": "Simyung Chang, John Yang, Jaeseok Choi, Nojun Kwak", "author": "Simyung Chang; John Yang; Jaeseok Choi; Nojun Kwak", "abstract": "We introduce the Genetic-Gated Networks (G2Ns), simple neural networks that combine a gate vector composed of binary genetic genes in the hidden layer(s) of networks. Our method can take both advantages of gradient-free optimization and gradient-based optimization methods, of which the former is effective for problems with multiple local minima, while the latter can quickly find local minima. In addition, multiple chromosomes can define different models, making it easy to construct multiple models and can be effectively applied to problems that require multiple models. We show that this G2N can be applied to typical reinforcement learning algorithms to achieve a large improvement in sample efficiency and performance.", "bibtex": "@inproceedings{NEURIPS2018_d516b136,\n author = {Chang, Simyung and Yang, John and Choi, Jaeseok and Kwak, Nojun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Genetic-Gated Networks for Deep Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d516b13671a4179d9b7b458a6ebdeb92-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d516b13671a4179d9b7b458a6ebdeb92-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d516b13671a4179d9b7b458a6ebdeb92-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d516b13671a4179d9b7b458a6ebdeb92-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d516b13671a4179d9b7b458a6ebdeb92-Reviews.html", "metareview": "", "pdf_size": 1306162, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2975854598446947593&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Seoul National University + Samsung Electronics; Seoul National University; Seoul National University; Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d516b13671a4179d9b7b458a6ebdeb92-Abstract.html", "aff_unique_index": "0+1;0;0;0", "aff_unique_norm": "Seoul National University;Samsung", "aff_unique_dep": ";Samsung Electronics", "aff_unique_url": "https://www.snu.ac.kr;https://www.samsung.com", "aff_unique_abbr": "SNU;Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Geometrically Coupled Monte Carlo Sampling", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11046", "id": "11046", "author_site": "Mark Rowland, Krzysztof Choromanski, Fran\u00e7ois Chalus, Aldo Pacchiano, Tamas Sarlos, Richard Turner, Adrian Weller", "author": "Mark Rowland; Krzysztof M Choromanski; Fran\u00e7ois Chalus; Aldo Pacchiano; Tamas Sarlos; Richard E Turner; Adrian Weller", "abstract": "Monte Carlo sampling in high-dimensional, low-sample settings is important in many machine learning tasks. We improve current methods for sampling in Euclidean spaces by avoiding independence, and instead consider ways to couple samples. We show fundamental connections to optimal transport theory, leading to novel sampling algorithms, and providing new theoretical grounding for existing strategies. We compare our new strategies against prior methods for improving sample efficiency, including QMC, by studying discrepancy. We explore our findings empirically, and observe benefits of our sampling schemes for reinforcement learning and generative modelling.", "bibtex": "@inproceedings{NEURIPS2018_b3e3e393,\n author = {Rowland, Mark and Choromanski, Krzysztof M and Chalus, Fran\\c{c}ois and Pacchiano, Aldo and Sarlos, Tamas and Turner, Richard E and Weller, Adrian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Geometrically Coupled Monte Carlo Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b3e3e393c77e35a4a3f3cbd1e429b5dc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b3e3e393c77e35a4a3f3cbd1e429b5dc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b3e3e393c77e35a4a3f3cbd1e429b5dc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b3e3e393c77e35a4a3f3cbd1e429b5dc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b3e3e393c77e35a4a3f3cbd1e429b5dc-Reviews.html", "metareview": "", "pdf_size": 444652, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5458227057172277078&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of Cambridge; Google Brain Robotics; University of Cambridge; University of California, Berkeley; Google Research; University of Cambridge; University of Cambridge + Alan Turing Institute", "aff_domain": "cam.ac.uk;google.com;gmail.com;berkeley.edu;google.com;cam.ac.uk;cam.ac.uk", "email": "cam.ac.uk;google.com;gmail.com;berkeley.edu;google.com;cam.ac.uk;cam.ac.uk", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b3e3e393c77e35a4a3f3cbd1e429b5dc-Abstract.html", "aff_unique_index": "0;1;0;2;1;0;0+3", "aff_unique_norm": "University of Cambridge;Google;University of California, Berkeley;Alan Turing Institute", "aff_unique_dep": ";Google Brain Robotics;;", "aff_unique_url": "https://www.cam.ac.uk;https://ai.google;https://www.berkeley.edu;https://www.turing.ac.uk", "aff_unique_abbr": "Cambridge;Google Brain Robotics;UC Berkeley;ATI", "aff_campus_unique_index": "0;1;0;2;1;0;0", "aff_campus_unique": "Cambridge;Mountain View;Berkeley;", "aff_country_unique_index": "0;1;0;1;1;0;0+0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Geometry Based Data Generation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11156", "id": "11156", "author_site": "Ofir Lindenbaum, Jay Stanley, Guy Wolf, Smita Krishnaswamy", "author": "Ofir Lindenbaum; Jay Stanley; Guy Wolf; Smita Krishnaswamy", "abstract": "We propose a new type of generative model for high-dimensional data that learns a manifold geometry of the data, rather than density, and can generate points evenly along this manifold. This is in contrast to existing generative models that represent data density, and are strongly affected by noise and other artifacts of data collection. We demonstrate how this approach corrects sampling biases and artifacts, thus improves several downstream data analysis tasks, such as clustering and classification. Finally, we demonstrate that this approach is especially useful in biology where, despite the advent of single-cell technologies, rare subpopulations and gene-interaction relationships are affected by biased sampling. We show that SUGAR can generate hypothetical populations, and it is able to reveal intrinsic patterns and mutual-information relationships between genes on a single-cell RNA sequencing dataset of hematopoiesis.", "bibtex": "@inproceedings{NEURIPS2018_c8ed21db,\n author = {Lindenbaum, Ofir and Stanley, Jay and Wolf, Guy and Krishnaswamy, Smita},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Geometry Based Data Generation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c8ed21db4f678f3b13b9d5ee16489088-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c8ed21db4f678f3b13b9d5ee16489088-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c8ed21db4f678f3b13b9d5ee16489088-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c8ed21db4f678f3b13b9d5ee16489088-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c8ed21db4f678f3b13b9d5ee16489088-Reviews.html", "metareview": "", "pdf_size": 2571255, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5425940571787202010&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Applied Mathematics Program, Yale University; Computational Biology & Bioinformatics Program, Yale University; Applied Mathematics Program, Yale University; Departments of Genetics & Computer Science, Yale University", "aff_domain": "yale.edu;yale.edu;yale.edu;yale.edu", "email": "yale.edu;yale.edu;yale.edu;yale.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c8ed21db4f678f3b13b9d5ee16489088-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Yale University", "aff_unique_dep": "Applied Mathematics Program", "aff_unique_url": "https://www.yale.edu", "aff_unique_abbr": "Yale", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "New Haven;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Geometry-Aware Recurrent Neural Networks for Active Visual Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11497", "id": "11497", "author_site": "Ricson Cheng, Ziyan Wang, Katerina Fragkiadaki", "author": "Ricson Cheng; Ziyan Wang; Katerina Fragkiadaki", "abstract": "We present recurrent geometry-aware neural networks that integrate visual in-\nformation across multiple views of a scene into 3D latent feature tensors, while\nmaintaining an one-to-one mapping between 3D physical locations in the world\nscene and latent feature locations. Object detection, object segmentation, and 3D\nreconstruction is then carried out directly using the constructed 3D feature memory,\nas opposed to any of the input 2D images. The proposed models are equipped\nwith differentiable egomotion-aware feature warping and (learned) depth-aware\nunprojection operations to achieve geometrically consistent mapping between the\nfeatures in the input frame and the constructed latent model of the scene. We\nempirically show the proposed model generalizes much better than geometry-\nunaware LSTM/GRU networks, especially under the presence of multiple objects\nand cross-object occlusions. Combined with active view selection policies, our\nmodel learns to select informative viewpoints to integrate information from by\n\u201cundoing\" cross-object occlusions, seamlessly combining geometry with learning\nfrom experience.", "bibtex": "@inproceedings{NEURIPS2018_8c9f32e0,\n author = {Cheng, Ricson and Wang, Ziyan and Fragkiadaki, Katerina},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Geometry-Aware Recurrent Neural Networks for Active Visual Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8c9f32e03aeb2e3000825c8c875c4edd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8c9f32e03aeb2e3000825c8c875c4edd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8c9f32e03aeb2e3000825c8c875c4edd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8c9f32e03aeb2e3000825c8c875c4edd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8c9f32e03aeb2e3000825c8c875c4edd-Reviews.html", "metareview": "", "pdf_size": 3573184, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6659107644378330676&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;andrew.cmu.edu;cs.cmu.edu", "email": "andrew.cmu.edu;andrew.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8c9f32e03aeb2e3000825c8c875c4edd-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Global Convergence of Langevin Dynamics Based Algorithms for Nonconvex Optimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11317", "id": "11317", "author_site": "Pan Xu, Jinghui Chen, Difan Zou, Quanquan Gu", "author": "Pan Xu; Jinghui Chen; Difan Zou; Quanquan Gu", "abstract": "We present a unified framework to analyze the global convergence of Langevin dynamics based algorithms for nonconvex finite-sum optimization with $n$ component functions. At the core of our analysis is a direct analysis of the ergodicity of the numerical approximations to Langevin dynamics, which leads to faster convergence rates. Specifically, we show that gradient Langevin dynamics (GLD) and stochastic gradient Langevin dynamics (SGLD) converge to the \\textit{almost minimizer}\\footnote{Following \\citet{raginsky2017non}, an almost minimizer is defined to be a point which is within the ball of the global minimizer with radius $O(d\\log(\\beta+1)/\\beta)$, where $d$ is the problem dimension and $\\beta$ is the inverse temperature parameter.} within $\\tilde O\\big(nd/(\\lambda\\epsilon) \\big)$\\footnote{$\\tilde O(\\cdot)$ notation hides polynomials of logarithmic terms and constants.} and $\\tilde O\\big(d^7/(\\lambda^5\\epsilon^5) \\big)$ stochastic gradient evaluations respectively, where $d$ is the problem dimension, and $\\lambda$ is the spectral gap of the Markov chain generated by GLD. Both results improve upon the best known gradient complexity\\footnote{Gradient complexity is defined as the total number of stochastic gradient evaluations of an algorithm, which is the number of stochastic gradients calculated per iteration times the total number of iterations.} results \\citep{raginsky2017non}. \nFurthermore, for the first time we prove the global convergence guarantee for variance reduced stochastic gradient Langevin dynamics (VR-SGLD) to the almost minimizer within $\\tilde O\\big(\\sqrt{n}d^5/(\\lambda^4\\epsilon^{5/2})\\big)$ stochastic gradient evaluations, which outperforms the gradient complexities of GLD and SGLD in a wide regime. \nOur theoretical analyses shed some light on using Langevin dynamics based algorithms for nonconvex optimization with provable guarantees.", "bibtex": "@inproceedings{NEURIPS2018_9c19a2aa,\n author = {Xu, Pan and Chen, Jinghui and Zou, Difan and Gu, Quanquan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Global Convergence of Langevin Dynamics Based Algorithms for Nonconvex Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9c19a2aa1d84e04b0bd4bc888792bd1e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9c19a2aa1d84e04b0bd4bc888792bd1e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9c19a2aa1d84e04b0bd4bc888792bd1e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9c19a2aa1d84e04b0bd4bc888792bd1e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9c19a2aa1d84e04b0bd4bc888792bd1e-Reviews.html", "metareview": "", "pdf_size": 1104647, "gs_citation": 225, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2510012428035526671&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Computer Science, UCLA; Department of Computer Science, University of Virginia; Department of Computer Science, UCLA; Department of Computer Science, UCLA", "aff_domain": "cs.ucla.edu;virginia.edu;cs.ucla.edu;cs.ucla.edu", "email": "cs.ucla.edu;virginia.edu;cs.ucla.edu;cs.ucla.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9c19a2aa1d84e04b0bd4bc888792bd1e-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of California, Los Angeles;University of Virginia", "aff_unique_dep": "Department of Computer Science;Department of Computer Science", "aff_unique_url": "https://www.ucla.edu;https://www.virginia.edu", "aff_unique_abbr": "UCLA;UVA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Global Gated Mixture of Second-order Pooling for Improving Deep Convolutional Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11145", "id": "11145", "author_site": "Qilong Wang, Zilin Gao, Jiangtao Xie, Wangmeng Zuo, Peihua Li", "author": "Qilong Wang; Zilin Gao; Jiangtao Xie; Wangmeng Zuo; Peihua Li", "abstract": "In most of existing deep convolutional neural networks (CNNs) for classification, global average (first-order) pooling (GAP) has become a standard module to summarize activations of the last convolution layer as final representation for prediction. Recent researches show integration of higher-order pooling (HOP) methods clearly improves performance of deep CNNs. However, both GAP and existing HOP methods assume unimodal distributions, which cannot fully capture statistics of convolutional activations, limiting representation ability of deep CNNs, especially for samples with complex contents. To overcome the above limitation, this paper proposes a global Gated Mixture of Second-order Pooling (GM-SOP) method to further improve representation ability of deep CNNs. To this end, we introduce a sparsity-constrained gating mechanism and propose a novel parametric SOP as component of mixture model. Given a bank of SOP candidates, our method can adaptively choose Top-K (K > 1) candidates for each input sample through the sparsity-constrained gating module, and performs weighted sum of outputs of K selected candidates as representation of the sample. The proposed GM-SOP can flexibly accommodate a large number of personalized SOP candidates in an efficient way, leading to richer representations. The deep networks with our GM-SOP can be end-to-end trained, having potential to characterize complex, multi-modal distributions. The proposed method is evaluated on two large scale image benchmarks (i.e., downsampled ImageNet-1K and Places365), and experimental results show our GM-SOP is superior to its counterparts and achieves very competitive performance. The source code will be available at http://www.peihuali.org/GM-SOP.", "bibtex": "@inproceedings{NEURIPS2018_17c276c8,\n author = {Wang, Qilong and Gao, Zilin and Xie, Jiangtao and Zuo, Wangmeng and Li, Peihua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Global Gated Mixture of Second-order Pooling for Improving Deep Convolutional Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/17c276c8e723eb46aef576537e9d56d0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/17c276c8e723eb46aef576537e9d56d0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/17c276c8e723eb46aef576537e9d56d0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/17c276c8e723eb46aef576537e9d56d0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/17c276c8e723eb46aef576537e9d56d0-Reviews.html", "metareview": "", "pdf_size": 473757, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12539085796049951238&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Tianjin University+Dalian University of Technology; Dalian University of Technology; Dalian University of Technology; Harbin Institute of Technology; Dalian University of Technology", "aff_domain": "tju.edu.cn;mail.dlut.edu.cn;mail.dlut.edu.cn;hit.edu.cn;dlut.edu.cn", "email": "tju.edu.cn;mail.dlut.edu.cn;mail.dlut.edu.cn;hit.edu.cn;dlut.edu.cn", "github": "", "project": "http://www.peihuali.org/GM-SOP", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/17c276c8e723eb46aef576537e9d56d0-Abstract.html", "aff_unique_index": "0+1;1;1;2;1", "aff_unique_norm": "Tianjin University;Dalian University of Technology;Harbin Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.tju.edu.cn;http://www.dlut.edu.cn/;http://www.hit.edu.cn/", "aff_unique_abbr": "TJU;DUT;HIT", "aff_campus_unique_index": ";1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0+0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Global Geometry of Multichannel Sparse Blind Deconvolution on the Sphere", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11132", "id": "11132", "author_site": "Yanjun Li, Yoram Bresler", "author": "Yanjun Li; Yoram Bresler", "abstract": "Multichannel blind deconvolution is the problem of recovering an unknown signal $f$ and multiple unknown channels $x_i$ from convolutional measurements $y_i=x_i \\circledast f$ ($i=1,2,\\dots,N$). We consider the case where the $x_i$'s are sparse, and convolution with $f$ is invertible. Our nonconvex optimization formulation solves for a filter $h$ on the unit sphere that produces sparse output $y_i\\circledast h$. Under some technical assumptions, we show that all local minima of the objective function correspond to the inverse filter of $f$ up to an inherent sign and shift ambiguity, and all saddle points have strictly negative curvatures. This geometric structure allows successful recovery of $f$ and $x_i$ using a simple manifold gradient descent algorithm with random initialization. Our theoretical findings are complemented by numerical experiments, which demonstrate superior performance of the proposed approach over the previous methods.", "bibtex": "@inproceedings{NEURIPS2018_c3992e9a,\n author = {Li, Yanjun and Bresler, Yoram},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Global Geometry of Multichannel Sparse Blind Deconvolution on the Sphere},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c3992e9a68c5ae12bd18488bc579b30d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c3992e9a68c5ae12bd18488bc579b30d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c3992e9a68c5ae12bd18488bc579b30d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c3992e9a68c5ae12bd18488bc579b30d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c3992e9a68c5ae12bd18488bc579b30d-Reviews.html", "metareview": "", "pdf_size": 603431, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9514734665361715109&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "CSL and Department of ECE, University of Illinois Urbana-Champaign; CSL and Department of ECE, University of Illinois Urbana-Champaign", "aff_domain": "illinois.edu;illinois.edu", "email": "illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c3992e9a68c5ae12bd18488bc579b30d-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Global Non-convex Optimization with Discretized Diffusions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11919", "id": "11919", "author_site": "Murat Erdogdu, Lester Mackey, Ohad Shamir", "author": "Murat A Erdogdu; Lester Mackey; Ohad Shamir", "abstract": "An Euler discretization of the Langevin diffusion is known to converge to the global minimizers of certain convex and non-convex optimization problems. We show that this property holds for any suitably smooth diffusion and that different diffusions are suitable for optimizing different classes of convex and non-convex functions. This allows us to design diffusions suitable for globally optimizing convex and non-convex functions not covered by the existing Langevin theory. Our non-asymptotic analysis delivers computable optimization and integration error bounds based on easily accessed properties of the objective and chosen diffusion. Central to our approach are new explicit Stein factor bounds on the solutions of Poisson equations. We complement these results with improved optimization guarantees for targets other than the standard Gibbs measure.", "bibtex": "@inproceedings{NEURIPS2018_3ffebb08,\n author = {Erdogdu, Murat A and Mackey, Lester and Shamir, Ohad},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Global Non-convex Optimization with Discretized Diffusions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3ffebb08d23c609875d7177ee769a3e9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3ffebb08d23c609875d7177ee769a3e9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3ffebb08d23c609875d7177ee769a3e9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3ffebb08d23c609875d7177ee769a3e9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3ffebb08d23c609875d7177ee769a3e9-Reviews.html", "metareview": "", "pdf_size": 1563679, "gs_citation": 128, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6258201951049860638&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "University of Toronto+Vector Institute; Microsoft Research; Weizmann Institute of Science", "aff_domain": "cs.toronto.edu;microsoft.com;weizmann.ac.il", "email": "cs.toronto.edu;microsoft.com;weizmann.ac.il", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3ffebb08d23c609875d7177ee769a3e9-Abstract.html", "aff_unique_index": "0+1;2;3", "aff_unique_norm": "University of Toronto;Vector Institute;Microsoft;Weizmann Institute of Science", "aff_unique_dep": ";;Microsoft Research;", "aff_unique_url": "https://www.utoronto.ca;https://vectorinstitute.ai/;https://www.microsoft.com/en-us/research;https://www.weizmann.org.il", "aff_unique_abbr": "U of T;Vector Institute;MSR;Weizmann", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;1;2", "aff_country_unique": "Canada;United States;Israel" }, { "title": "Glow: Generative Flow with Invertible 1x1 Convolutions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11968", "id": "11968", "author_site": "Diederik Kingma, Prafulla Dhariwal", "author": "Diederik P. Kingma; Prafulla Dhariwal", "abstract": "Flow-based generative models are conceptually attractive due to tractability of the exact log-likelihood, tractability of exact latent-variable inference, and parallelizability of both training and synthesis. In this paper we propose Glow, a simple type of generative flow using invertible 1x1 convolution. Using our method we demonstrate a significant improvement in log-likelihood and qualitative sample quality. Perhaps most strikingly, we demonstrate that a generative model optimized towards the plain log-likelihood objective is capable of efficient synthesis of large and subjectively realistic-looking images.", "bibtex": "@inproceedings{NEURIPS2018_d139db6a,\n author = {Kingma, Durk P and Dhariwal, Prafulla},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Glow: Generative Flow with Invertible 1x1 Convolutions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d139db6a236200b21cc7f752979132d0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d139db6a236200b21cc7f752979132d0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d139db6a236200b21cc7f752979132d0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d139db6a236200b21cc7f752979132d0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d139db6a236200b21cc7f752979132d0-Reviews.html", "metareview": "", "pdf_size": 4151936, "gs_citation": 3875, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5834689841973227263&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "OpenAI*\u2020; OpenAI*\u2020", "aff_domain": ";", "email": ";", "github": "https://github.com/openai/glow", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d139db6a236200b21cc7f752979132d0-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "OpenAI", "aff_unique_dep": "", "aff_unique_url": "https://openai.com", "aff_unique_abbr": "OpenAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "GradiVeQ: Vector Quantization for Bandwidth-Efficient Gradient Aggregation in Distributed CNN Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11501", "id": "11501", "author_site": "Mingchao Yu, Zhifeng Lin, Krishna Narra, Songze Li, Youjie Li, Nam Sung Kim, Alex Schwing, Murali Annavaram, Salman Avestimehr", "author": "Mingchao Yu; Zhifeng Lin; Krishna Narra; Songze Li; Youjie Li; Nam Sung Kim; Alexander Schwing; Murali Annavaram; Salman Avestimehr", "abstract": "Data parallelism can boost the training speed of convolutional neural networks (CNN), but could suffer from significant communication costs caused by gradient aggregation. To alleviate this problem, several scalar quantization techniques have been developed to compress the gradients. But these techniques could perform poorly when used together with decentralized aggregation protocols like ring all-reduce (RAR), mainly due to their inability to directly aggregate compressed gradients. In this paper, we empirically demonstrate the strong linear correlations between CNN gradients, and propose a gradient vector quantization technique, named GradiVeQ, to exploit these correlations through principal component analysis (PCA) for substantial gradient dimension reduction. GradiveQ enables direct aggregation of compressed gradients, hence allows us to build a distributed learning system that parallelizes GradiveQ gradient compression and RAR communications. Extensive experiments on popular CNNs demonstrate that applying GradiveQ slashes the wall-clock gradient aggregation time of the original RAR by more than 5x without noticeable accuracy loss, and reduce the end-to-end training time by almost 50%. The results also show that \\GradiveQ is compatible with scalar quantization techniques such as QSGD (Quantized SGD), and achieves a much higher speed-up gain under the same compression ratio.", "bibtex": "@inproceedings{NEURIPS2018_cf059682,\n author = {Yu, Mingchao and Lin, Zhifeng and Narra, Krishna and Li, Songze and Li, Youjie and Kim, Nam Sung and Schwing, Alexander and Annavaram, Murali and Avestimehr, Salman},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {GradiVeQ: Vector Quantization for Bandwidth-Efficient Gradient Aggregation in Distributed CNN Training},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cf05968255451bdefe3c5bc64d550517-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cf05968255451bdefe3c5bc64d550517-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cf05968255451bdefe3c5bc64d550517-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cf05968255451bdefe3c5bc64d550517-Reviews.html", "metareview": "", "pdf_size": 909806, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1310386150754473734&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "University of Southern California; University of Southern California; University of Southern California; University of Southern California; University of Illinois at Urbana Champaign; University of Illinois at Urbana Champaign; University of Illinois at Urbana Champaign; University of Southern California; University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu;usc.edu;illinois.edu;illinois.edu;illinois.edu;usc.edu;usc.edu", "email": "usc.edu;usc.edu;usc.edu;usc.edu;illinois.edu;illinois.edu;illinois.edu;usc.edu;usc.edu", "github": "", "project": "", "author_num": 9, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cf05968255451bdefe3c5bc64d550517-Abstract.html", "aff_unique_index": "0;0;0;0;1;1;1;0;0", "aff_unique_norm": "University of Southern California;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;https://illinois.edu", "aff_unique_abbr": "USC;UIUC", "aff_campus_unique_index": "0;0;0;0;1;1;1;0;0", "aff_campus_unique": "Los Angeles;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Gradient Descent Meets Shift-and-Invert Preconditioning for Eigenvector Computation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11289", "id": "11289", "author": "Zhiqiang Xu", "abstract": "Shift-and-invert preconditioning, as a classic acceleration technique for the leading eigenvector computation, has received much attention again recently, owing to fast least-squares solvers for efficiently approximating matrix inversions in power iterations. In this work, we adopt an inexact Riemannian gradient descent perspective to investigate this technique on the effect of the step-size scheme. The shift-and-inverted power method is included as a special case with adaptive step-sizes. Particularly, two other step-size settings, i.e., constant step-sizes and Barzilai-Borwein (BB) step-sizes, are examined theoretically and/or empirically. We present a novel convergence analysis for the constant step-size setting that achieves a rate at $\\tilde{O}(\\sqrt{\\frac{\\lambda_{1}}{\\lambda_{1}-\\lambda_{p+1}}})$, where $\\lambda_{i}$ represents the $i$-th largest eigenvalue of the given real symmetric matrix and $p$ is the multiplicity of $\\lambda_{1}$. Our experimental studies show that the proposed algorithm can be significantly faster than the shift-and-inverted power method in practice.", "bibtex": "@inproceedings{NEURIPS2018_7fb8ceb3,\n author = {Xu, Zhiqiang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Gradient Descent Meets Shift-and-Invert Preconditioning for Eigenvector Computation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7fb8ceb3bd59c7956b1df66729296a4c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7fb8ceb3bd59c7956b1df66729296a4c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7fb8ceb3bd59c7956b1df66729296a4c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7fb8ceb3bd59c7956b1df66729296a4c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7fb8ceb3bd59c7956b1df66729296a4c-Reviews.html", "metareview": "", "pdf_size": 265391, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17277833557138761457&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7fb8ceb3bd59c7956b1df66729296a4c-Abstract.html" }, { "title": "Gradient Descent for Spiking Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11159", "id": "11159", "author_site": "Dongsung Huh, Terrence Sejnowski", "author": "Dongsung Huh; Terrence J. Sejnowski", "abstract": "Most large-scale network models use neurons with static nonlinearities that produce analog output, despite the fact that information processing in the brain is predominantly carried out by dynamic neurons that produce discrete pulses called spikes. Research in spike-based computation has been impeded by the lack of efficient supervised learning algorithm for spiking neural networks. Here, we present a gradient descent method for optimizing spiking network models by introducing a differentiable formulation of spiking dynamics and deriving the exact gradient calculation. For demonstration, we trained recurrent spiking networks on two dynamic tasks: one that requires optimizing fast (~ millisecond) spike-based interactions for efficient encoding of information, and a delayed-memory task over extended duration (~ second). The results show that the gradient descent approach indeed optimizes networks dynamics on the time scale of individual spikes as well as on behavioral time scales. In conclusion, our method yields a general purpose supervised learning algorithm for spiking neural networks, which can facilitate further investigations on spike-based computations.", "bibtex": "@inproceedings{NEURIPS2018_185e65bc,\n author = {Huh, Dongsung and Sejnowski, Terrence J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Gradient Descent for Spiking Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/185e65bc40581880c4f2c82958de8cfe-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/185e65bc40581880c4f2c82958de8cfe-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/185e65bc40581880c4f2c82958de8cfe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/185e65bc40581880c4f2c82958de8cfe-Reviews.html", "metareview": "", "pdf_size": 1862316, "gs_citation": 500, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3067086549811480594&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "Salk Institute; Salk Institute", "aff_domain": "salk.edu;salk.edu", "email": "salk.edu;salk.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/185e65bc40581880c4f2c82958de8cfe-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Salk Institute for Biological Studies", "aff_unique_dep": "", "aff_unique_url": "https://www.salk.edu", "aff_unique_abbr": "Salk Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Gradient Sparsification for Communication-Efficient Distributed Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11147", "id": "11147", "author_site": "Jianqiao Wangni, Jialei Wang, Ji Liu, Tong Zhang", "author": "Jianqiao Wangni; Jialei Wang; Ji Liu; Tong Zhang", "abstract": "Modern large-scale machine learning applications require stochastic optimization algorithms to be implemented on distributed computational architectures. A key bottleneck is the communication overhead for exchanging information such as stochastic gradients among different workers. In this paper, to reduce the communication cost, we propose a convex optimization formulation to minimize the coding length of stochastic gradients. The key idea is to randomly drop out coordinates of the stochastic gradient vectors and amplify the remaining coordinates appropriately to ensure the sparsified gradient to be unbiased. To solve the optimal sparsification efficiently, several simple and fast algorithms are proposed for an approximate solution, with a theoretical guarantee for sparseness. Experiments on $\\ell_2$ regularized logistic regression, support vector machines, and convolutional neural networks validate our sparsification approaches.", "bibtex": "@inproceedings{NEURIPS2018_3328bdf9,\n author = {Wangni, Jianqiao and Wang, Jialei and Liu, Ji and Zhang, Tong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Gradient Sparsification for Communication-Efficient Distributed Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3328bdf9a4b9504b9398284244fe97c2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3328bdf9a4b9504b9398284244fe97c2-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3328bdf9a4b9504b9398284244fe97c2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3328bdf9a4b9504b9398284244fe97c2-Reviews.html", "metareview": "", "pdf_size": 335589, "gs_citation": 743, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3108883396466766008&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "University of Pennsylvania + Tencent AI Lab; Two Sigma Investments; University of Rochester + Tencent AI Lab; Tencent AI Lab", "aff_domain": "seas.upenn.edu;twosigma.com;gmail.com;tongzhang-ml.org", "email": "seas.upenn.edu;twosigma.com;gmail.com;tongzhang-ml.org", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3328bdf9a4b9504b9398284244fe97c2-Abstract.html", "aff_unique_index": "0+1;2;3+1;1", "aff_unique_norm": "University of Pennsylvania;Tencent;Two Sigma Investments;University of Rochester", "aff_unique_dep": ";Tencent AI Lab;;", "aff_unique_url": "https://www.upenn.edu;https://ai.tencent.com;https://www.twosigma.com;https://www.rochester.edu", "aff_unique_abbr": "UPenn;Tencent AI Lab;Two Sigma;U of R", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0+1;0;0+1;1", "aff_country_unique": "United States;China" }, { "title": "Graph Convolutional Policy Network for Goal-Directed Molecular Graph Generation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11620", "id": "11620", "author_site": "Jiaxuan You, Bowen Liu, Zhitao Ying, Vijay Pande, Jure Leskovec", "author": "Jiaxuan You; Bowen Liu; Zhitao Ying; Vijay Pande; Jure Leskovec", "abstract": "Generating novel graph structures that optimize given objectives while obeying some given underlying rules is fundamental for chemistry, biology and social science research. This is especially important in the task of molecular graph generation, whose goal is to discover novel molecules with desired properties such as drug-likeness and synthetic accessibility, while obeying physical laws such as chemical valency. However, designing models that finds molecules that optimize desired properties while incorporating highly complex and non-differentiable rules remains to be a challenging task. Here we propose Graph Convolutional Policy Network (GCPN), a general graph convolutional network based model for goal-directed graph generation through reinforcement learning. The model is trained to optimize domain-specific rewards and adversarial loss through policy gradient, and acts in an environment that incorporates domain-specific rules. Experimental results show that GCPN can achieve 61% improvement on chemical property optimization over state-of-the-art baselines while resembling known molecules, and achieve 184% improvement on the constrained property optimization task.", "bibtex": "@inproceedings{NEURIPS2018_d60678e8,\n author = {You, Jiaxuan and Liu, Bowen and Ying, Zhitao and Pande, Vijay and Leskovec, Jure},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Graph Convolutional Policy Network for Goal-Directed Molecular Graph Generation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d60678e8f2ba9c540798ebbde31177e8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d60678e8f2ba9c540798ebbde31177e8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d60678e8f2ba9c540798ebbde31177e8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d60678e8f2ba9c540798ebbde31177e8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d60678e8f2ba9c540798ebbde31177e8-Reviews.html", "metareview": "", "pdf_size": 505897, "gs_citation": 1200, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15276529180320001334&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 18, "aff": "Department of Computer Science; Department of Chemistry; Department of Computer Science; Department of Bioengineering; Department of Computer Science", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu", "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d60678e8f2ba9c540798ebbde31177e8-Abstract.html", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Unknown Institution;Institution not specified;Department of Bioengineering", "aff_unique_dep": "Department of Computer Science;Department of Chemistry;Bioengineering", "aff_unique_url": ";;", "aff_unique_abbr": ";;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "Graph Oracle Models, Lower Bounds, and Gaps for Parallel Stochastic Optimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11812", "id": "11812", "author_site": "Blake Woodworth, Jialei Wang, Adam Smith, Brendan McMahan, Nati Srebro", "author": "Blake E Woodworth; Jialei Wang; Adam Smith; Brendan McMahan; Nati Srebro", "abstract": "We suggest a general oracle-based framework that captures parallel\n stochastic optimization in different parallelization settings\n described by a dependency graph, and derive generic lower bounds \n in terms of this graph. We then use the framework and derive lower\n bounds to study several specific parallel optimization settings,\n including delayed updates and parallel processing with intermittent\n communication. We highlight gaps between lower and upper bounds on\n the oracle complexity, and cases where the ``natural'' algorithms\n are not known to be optimal.", "bibtex": "@inproceedings{NEURIPS2018_3ec27c2c,\n author = {Woodworth, Blake E and Wang, Jialei and Smith, Adam and McMahan, Brendan and Srebro, Nati},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Graph Oracle Models, Lower Bounds, and Gaps for Parallel Stochastic Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3ec27c2cff04bc5fd2586ca36c62044e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3ec27c2cff04bc5fd2586ca36c62044e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3ec27c2cff04bc5fd2586ca36c62044e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3ec27c2cff04bc5fd2586ca36c62044e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3ec27c2cff04bc5fd2586ca36c62044e-Reviews.html", "metareview": "", "pdf_size": 430034, "gs_citation": 138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18443180344188916734&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "Toyota Technological Institute at Chicago; Two Sigma Investments; Boston University; Google; Toyota Technological Institute at Chicago + Google", "aff_domain": "ttic.edu;twosigma.com;bu.edu;google.com;ttic.edu", "email": "ttic.edu;twosigma.com;bu.edu;google.com;ttic.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3ec27c2cff04bc5fd2586ca36c62044e-Abstract.html", "aff_unique_index": "0;1;2;3;0+3", "aff_unique_norm": "Toyota Technological Institute at Chicago;Two Sigma Investments;Boston University;Google", "aff_unique_dep": ";;;Google", "aff_unique_url": "https://www.tti-chicago.org;https://www.twosigma.com;https://www.bu.edu;https://www.google.com", "aff_unique_abbr": "TTI Chicago;Two Sigma;BU;Google", "aff_campus_unique_index": "0;2;0+2", "aff_campus_unique": "Chicago;;Mountain View", "aff_country_unique_index": "0;0;0;0;0+0", "aff_country_unique": "United States" }, { "title": "Graphical Generative Adversarial Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11589", "id": "11589", "author_site": "Chongxuan LI, Max Welling, Jun Zhu, Bo Zhang", "author": "Chongxuan LI; Max Welling; Jun Zhu; Bo Zhang", "abstract": "We propose Graphical Generative Adversarial Networks (Graphical-GAN) to model structured data. Graphical-GAN conjoins the power of Bayesian networks on compactly representing the dependency structures among random variables and that of generative adversarial networks on learning expressive dependency functions. We introduce a structured recognition model to infer the posterior distribution of latent variables given observations. We generalize the Expectation Propagation (EP) algorithm to learn the generative model and recognition model jointly. Finally, we present two important instances of Graphical-GAN, i.e. Gaussian Mixture GAN (GMGAN) and State Space GAN (SSGAN), which can successfully learn the discrete and temporal structures on visual datasets, respectively.", "bibtex": "@inproceedings{NEURIPS2018_c5c1cb0b,\n author = {LI, Chongxuan and Welling, Max and Zhu, Jun and Zhang, Bo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Graphical Generative Adversarial Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c5c1cb0bebd56ae38817b251ad72bedb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c5c1cb0bebd56ae38817b251ad72bedb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c5c1cb0bebd56ae38817b251ad72bedb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c5c1cb0bebd56ae38817b251ad72bedb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c5c1cb0bebd56ae38817b251ad72bedb-Reviews.html", "metareview": "", "pdf_size": 4119489, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13094733406106291079&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science & Technology, Institute for Arti\ufb01cial Intelligence, BNRist Center, THBI Lab, State Key Lab for Intell. Tech. & Sys., Tsinghua University; University of Amsterdam, and the Canadian Institute for Advanced Research (CIFAR); Department of Computer Science & Technology, Institute for Arti\ufb01cial Intelligence, BNRist Center, THBI Lab, State Key Lab for Intell. Tech. & Sys., Tsinghua University; Department of Computer Science & Technology, Institute for Arti\ufb01cial Intelligence, BNRist Center, THBI Lab, State Key Lab for Intell. Tech. & Sys., Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;uva.nl;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn", "email": "mails.tsinghua.edu.cn;uva.nl;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c5c1cb0bebd56ae38817b251ad72bedb-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Tsinghua University;University of Amsterdam", "aff_unique_dep": "Department of Computer Science & Technology;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.uva.nl", "aff_unique_abbr": "THU;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Netherlands" }, { "title": "Graphical model inference: Sequential Monte Carlo meets deterministic approximations", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11784", "id": "11784", "author_site": "Fredrik Lindsten, Jouni Helske, Matti Vihola", "author": "Fredrik Lindsten; Jouni Helske; Matti Vihola", "abstract": "Approximate inference in probabilistic graphical models (PGMs) can be grouped into deterministic methods and Monte-Carlo-based methods. The former can often provide accurate and rapid inferences, but are typically associated with biases that are hard to quantify. The latter enjoy asymptotic consistency, but can suffer from high computational costs. In this paper we present a way of bridging the gap between deterministic and stochastic inference. Specifically, we suggest an efficient sequential Monte Carlo (SMC) algorithm for PGMs which can leverage the output from deterministic inference methods. While generally applicable, we show explicitly how this can be done with loopy belief propagation, expectation propagation, and Laplace approximations. The resulting algorithm can be viewed as a post-correction of the biases associated with these methods and, indeed, numerical results show clear improvements over the baseline deterministic methods as well as over \"plain\" SMC.", "bibtex": "@inproceedings{NEURIPS2018_351869bd,\n author = {Lindsten, Fredrik and Helske, Jouni and Vihola, Matti},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Graphical model inference: Sequential Monte Carlo meets deterministic approximations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/351869bde8b9d6ad1e3090bd173f600d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/351869bde8b9d6ad1e3090bd173f600d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/351869bde8b9d6ad1e3090bd173f600d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/351869bde8b9d6ad1e3090bd173f600d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/351869bde8b9d6ad1e3090bd173f600d-Reviews.html", "metareview": "", "pdf_size": 259192, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14948182597999923691&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 10, "aff": "Department of Information Technology, Uppsala University, Uppsala, Sweden; Department of Science and Technology, Link\u00f6ping University, Norrk\u00f6ping, Sweden; Department of Mathematics and Statistics, University of Jyv\u00e4skyl\u00e4, Jyv\u00e4skyl\u00e4, Finland", "aff_domain": "it.uu.se;liu.se;jyu.fi", "email": "it.uu.se;liu.se;jyu.fi", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/351869bde8b9d6ad1e3090bd173f600d-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Uppsala University;Link\u00f6ping University;University of Jyv\u00e4skyl\u00e4", "aff_unique_dep": "Department of Information Technology;Department of Science and Technology;Department of Mathematics and Statistics", "aff_unique_url": "https://www.uu.se;https://www.liu.se;https://www.jyu.fi", "aff_unique_abbr": "UU;LiU;", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Uppsala;Norrk\u00f6ping;Jyv\u00e4skyl\u00e4", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Sweden;Finland" }, { "title": "Greedy Hash: Towards Fast Optimization for Accurate Hash Coding in CNN", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11102", "id": "11102", "author_site": "Shupeng Su, Chao Zhang, Kai Han, Yonghong Tian", "author": "Shupeng Su; Chao Zhang; Kai Han; Yonghong Tian", "abstract": "To convert the input into binary code, hashing algorithm has been widely used for approximate nearest neighbor search on large-scale image sets due to its computation and storage efficiency. Deep hashing further improves the retrieval quality by combining the hash coding with deep neural network. However, a major difficulty in deep hashing lies in the discrete constraints imposed on the network output, which generally makes the optimization NP hard. In this work, we adopt the greedy principle to tackle this NP hard problem by iteratively updating the network toward the probable optimal discrete solution in each iteration. A hash coding layer is designed to implement our approach which strictly uses the sign function in forward propagation to maintain the discrete constraints, while in back propagation the gradients are transmitted intactly to the front layer to avoid the vanishing gradients. In addition to the theoretical derivation, we provide a new perspective to visualize and understand the effectiveness and efficiency of our algorithm. Experiments on benchmark datasets show that our scheme outperforms state-of-the-art hashing methods in both supervised and unsupervised tasks.", "bibtex": "@inproceedings{NEURIPS2018_13f3cf8c,\n author = {Su, Shupeng and Zhang, Chao and Han, Kai and Tian, Yonghong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Greedy Hash: Towards Fast Optimization for Accurate Hash Coding in CNN},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/13f3cf8c531952d72e5847c4183e6910-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/13f3cf8c531952d72e5847c4183e6910-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/13f3cf8c531952d72e5847c4183e6910-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/13f3cf8c531952d72e5847c4183e6910-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/13f3cf8c531952d72e5847c4183e6910-Reviews.html", "metareview": "", "pdf_size": 359916, "gs_citation": 253, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5080578763427257320&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff": "Key Laboratory of Machine Perception (MOE), School of EECS, Peking University; Key Laboratory of Machine Perception (MOE), School of EECS, Peking University + National Engineering Laboratory for Video Technology, School of EECS, Peking University; Key Laboratory of Machine Perception (MOE), School of EECS, Peking University + Huawei Noah\u2019s Ark Lab; Key Laboratory of Machine Perception (MOE), School of EECS, Peking University + National Engineering Laboratory for Video Technology, School of EECS, Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/13f3cf8c531952d72e5847c4183e6910-Abstract.html", "aff_unique_index": "0;0+0;0+1;0+0", "aff_unique_norm": "Peking University;Huawei", "aff_unique_dep": "School of EECS;Noah\u2019s Ark Lab", "aff_unique_url": "http://www.pku.edu.cn;https://www.huawei.com", "aff_unique_abbr": "PKU;Huawei", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0;0+0;0+0", "aff_country_unique": "China" }, { "title": "Group Equivariant Capsule Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11844", "id": "11844", "author_site": "Jan Eric Lenssen, Matthias Fey, Pascal Libuschewski", "author": "Jan Eric Lenssen; Matthias Fey; Pascal Libuschewski", "abstract": "We present group equivariant capsule networks, a framework to introduce guaranteed equivariance and invariance properties to the capsule network idea. Our work can be divided into two contributions. First, we present a generic routing by agreement algorithm defined on elements of a group and prove that equivariance of output pose vectors, as well as invariance of output activations, hold under certain conditions. Second, we connect the resulting equivariant capsule networks with work from the field of group convolutional networks. Through this connection, we provide intuitions of how both methods relate and are able to combine the strengths of both approaches in one deep neural network architecture. The resulting framework allows sparse evaluation of the group convolution operator, provides control over specific equivariance and invariance properties, and can use routing by agreement instead of pooling operations. In addition, it is able to provide interpretable and equivariant representation vectors as output capsules, which disentangle evidence of object existence from its pose.", "bibtex": "@inproceedings{NEURIPS2018_c7d0e7e2,\n author = {Lenssen, Jan Eric and Fey, Matthias and Libuschewski, Pascal},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Group Equivariant Capsule Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c7d0e7e2922845f3e1185d246d01365d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c7d0e7e2922845f3e1185d246d01365d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c7d0e7e2922845f3e1185d246d01365d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c7d0e7e2922845f3e1185d246d01365d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c7d0e7e2922845f3e1185d246d01365d-Reviews.html", "metareview": "", "pdf_size": 906425, "gs_citation": 157, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11608023930229611825&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 10, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c7d0e7e2922845f3e1185d246d01365d-Abstract.html" }, { "title": "GroupReduce: Block-Wise Low-Rank Approximation for Neural Language Model Shrinking", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12038", "id": "12038", "author_site": "Patrick Chen, Si Si, Yang Li, Ciprian Chelba, Cho-Jui Hsieh", "author": "Patrick Chen; Si Si; Yang Li; Ciprian Chelba; Cho-Jui Hsieh", "abstract": "Model compression is essential for serving large deep neural nets on devices with limited resources or applications that require real-time responses. For advanced NLP problems, a neural language model usually consists of recurrent layers (e.g., using LSTM cells), an embedding matrix for representing input tokens, and a softmax layer for generating output tokens. For problems with a very large vocabulary size, the embedding and the softmax matrices can account for more than half of the model size. For instance, the bigLSTM model achieves state-of-the-art performance on the One-Billion-Word (OBW) dataset with around 800k vocabulary, and its word embedding and softmax matrices use more than 6GBytes space, and are responsible for over 90\\% of the model parameters. In this paper, we propose GroupReduce, a novel compression method for neural language models, based on vocabulary-partition (block) based low-rank matrix approximation and the inherent frequency distribution of tokens (the power-law distribution of words). We start by grouping words into $c$ blocks based on their frequency, and then refine the clustering iteratively by constructing weighted low-rank approximation for each block, where the weights are based the frequencies of the words in the block. The experimental results show our method can significantly outperform traditional compression methods such as low-rank approximation and pruning. On the OBW dataset, our method achieved 6.6x compression rate for the embedding and softmax matrices, and when combined with quantization, our method can achieve 26x compression rate without losing prediction accuracy.", "bibtex": "@inproceedings{NEURIPS2018_a2b8a85a,\n author = {Chen, Patrick and Si, Si and Li, Yang and Chelba, Ciprian and Hsieh, Cho-Jui},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {GroupReduce: Block-Wise Low-Rank Approximation for Neural Language Model Shrinking},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a2b8a85a29b2d64ad6f47275bf1360c6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a2b8a85a29b2d64ad6f47275bf1360c6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a2b8a85a29b2d64ad6f47275bf1360c6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a2b8a85a29b2d64ad6f47275bf1360c6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a2b8a85a29b2d64ad6f47275bf1360c6-Reviews.html", "metareview": "", "pdf_size": 332737, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4115945175695069166&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "UCLA; Google Research; Google Research; Google Research; UCLA", "aff_domain": "g.ucla.edu;google.com;google.com;google.com;cs.ucla.edu", "email": "g.ucla.edu;google.com;google.com;google.com;cs.ucla.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a2b8a85a29b2d64ad6f47275bf1360c6-Abstract.html", "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of California, Los Angeles;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.ucla.edu;https://research.google", "aff_unique_abbr": "UCLA;Google Research", "aff_campus_unique_index": "0;1;1;1;0", "aff_campus_unique": "Los Angeles;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "GumBolt: Extending Gumbel trick to Boltzmann priors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11403", "id": "11403", "author_site": "Amir H Khoshaman, Mohammad Amin", "author": "Amir H Khoshaman; Mohammad Amin", "abstract": "Boltzmann machines (BMs) are appealing candidates for powerful priors in variational autoencoders (VAEs), as they are capable of capturing nontrivial and multi-modal distributions over discrete variables. However, non-differentiability of the discrete units prohibits using the reparameterization trick, essential for low-noise back propagation. The Gumbel trick resolves this problem in a consistent way by relaxing the variables and distributions, but it is incompatible with BM priors. Here, we propose the GumBolt, a model that extends the Gumbel trick to BM priors in VAEs. GumBolt is significantly simpler than the recently proposed methods with BM prior and outperforms them by a considerable margin. It achieves state-of-the-art performance on permutation invariant MNIST and OMNIGLOT datasets in the scope of models with only discrete latent variables. Moreover, the performance can be further improved by allowing multi-sampled (importance-weighted) estimation of log-likelihood in training, which was not possible with previous models.", "bibtex": "@inproceedings{NEURIPS2018_a00e5eb0,\n author = {Khoshaman, Amir H and Amin, Mohammad},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {GumBolt: Extending Gumbel trick to Boltzmann priors},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a00e5eb0973d24649a4a920fc53d9564-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a00e5eb0973d24649a4a920fc53d9564-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a00e5eb0973d24649a4a920fc53d9564-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a00e5eb0973d24649a4a920fc53d9564-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a00e5eb0973d24649a4a920fc53d9564-Reviews.html", "metareview": "", "pdf_size": 564665, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10024846911113795001&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "D-Wave Systems Inc.+Borealis AI; D-Wave Systems Inc.+Simon Fraser University", "aff_domain": "gmail.com;dwavesys.com", "email": "gmail.com;dwavesys.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a00e5eb0973d24649a4a920fc53d9564-Abstract.html", "aff_unique_index": "0+1;0+2", "aff_unique_norm": "D-Wave Systems Inc.;Borealis AI;Simon Fraser University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.dwavesys.com;https://www.borealisai.com;https://www.sfu.ca", "aff_unique_abbr": "D-Wave;Borealis AI;SFU", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0+0", "aff_country_unique": "Canada" }, { "title": "HOGWILD!-Gibbs can be PanAccurate", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11031", "id": "11031", "author_site": "Constantinos Daskalakis, Nishanth Dikkala, Siddhartha Jayanti", "author": "Constantinos Daskalakis; Nishanth Dikkala; Siddhartha Jayanti", "abstract": "Asynchronous Gibbs sampling has been recently shown to be fast-mixing and an accurate method for estimating probabilities of events on a small number of variables of a graphical model satisfying Dobrushin's condition~\\cite{DeSaOR16}. We investigate whether it can be used to accurately estimate expectations of functions of {\\em all the variables} of the model. Under the same condition, we show that the synchronous (sequential) and asynchronous Gibbs samplers can be coupled so that the expected Hamming distance between their (multivariate) samples remains bounded by $O(\\tau \\log n),$ where $n$ is the number of variables in the graphical model, and $\\tau$ is a measure of the asynchronicity. A similar bound holds for any constant power of the Hamming distance. Hence, the expectation of any function that is Lipschitz with respect to a power of the Hamming distance, can be estimated with a bias that grows logarithmically in $n$. Going beyond Lipschitz functions, we consider the bias arising from asynchronicity in estimating the expectation of polynomial functions of all variables in the model. Using recent concentration of measure results~\\cite{DaskalakisDK17,GheissariLP17,GotzeSS18}, we show that the bias introduced by the asynchronicity is of smaller order than the standard deviation of the function value already present in the true model. We perform experiments on a multi-processor machine to empirically illustrate our theoretical findings.", "bibtex": "@inproceedings{NEURIPS2018_a5bfc9e0,\n author = {Daskalakis, Constantinos and Dikkala, Nishanth and Jayanti, Siddhartha},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {HOGWILD!-Gibbs can be PanAccurate},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a5bfc9e07964f8dddeb95fc584cd965d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a5bfc9e07964f8dddeb95fc584cd965d-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a5bfc9e07964f8dddeb95fc584cd965d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a5bfc9e07964f8dddeb95fc584cd965d-Reviews.html", "metareview": "", "pdf_size": 383605, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11589098135494742868&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 10, "aff": "EECS & CSAIL, MIT; EECS & CSAIL, MIT; EECS & CSAIL, MIT", "aff_domain": "csail.mit.edu;csail.mit.edu;mit.edu", "email": "csail.mit.edu;csail.mit.edu;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a5bfc9e07964f8dddeb95fc584cd965d-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Electrical Engineering & Computer Science and Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "HOUDINI: Lifelong Learning as Program Synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11830", "id": "11830", "author_site": "Lazar Valkov, Dipak Chaudhari, Akash Srivastava, Charles Sutton, Swarat Chaudhuri", "author": "Lazar Valkov; Dipak Chaudhari; Akash Srivastava; Charles Sutton; Swarat Chaudhuri", "abstract": "We present a neurosymbolic framework for the lifelong learning of algorithmic tasks that mix perception and procedural reasoning. Reusing high-level concepts across domains and learning complex procedures are key challenges in lifelong learning. We show that a program synthesis approach that combines gradient descent with combinatorial search over programs can be a more effective response to these challenges than purely neural methods. Our framework, called HOUDINI, represents neural networks as strongly typed, differentiable functional programs that use symbolic higher-order combinators to compose a library of neural functions. Our learning algorithm consists of: (1) a symbolic program synthesizer that performs a type-directed search over parameterized programs, and decides on the library functions to reuse, and the architectures to combine them, while learning a sequence of tasks; and (2) a neural module that trains these programs using stochastic gradient descent. We evaluate HOUDINI on three benchmarks that combine perception with the algorithmic tasks of counting, summing, and shortest-path computation. Our experiments show that HOUDINI transfers high-level concepts more effectively than traditional transfer learning and progressive neural networks, and that the typed representation of networks signi\ufb01cantly accelerates the search.", "bibtex": "@inproceedings{NEURIPS2018_edc27f13,\n author = {Valkov, Lazar and Chaudhari, Dipak and Srivastava, Akash and Sutton, Charles and Chaudhuri, Swarat},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {HOUDINI: Lifelong Learning as Program Synthesis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/edc27f139c3b4e4bb29d1cdbc45663f9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/edc27f139c3b4e4bb29d1cdbc45663f9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/edc27f139c3b4e4bb29d1cdbc45663f9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/edc27f139c3b4e4bb29d1cdbc45663f9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/edc27f139c3b4e4bb29d1cdbc45663f9-Reviews.html", "metareview": "", "pdf_size": 684248, "gs_citation": 100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10841457222027435818&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "University of Edinburgh; Rice University; University of Edinburgh; University of Edinburgh + The Alan Turing Institute + Google Brain; Rice University", "aff_domain": "sms.ed.ac.uk;rice.edu;ed.ac.uk;google.com;rice.edu", "email": "sms.ed.ac.uk;rice.edu;ed.ac.uk;google.com;rice.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/edc27f139c3b4e4bb29d1cdbc45663f9-Abstract.html", "aff_unique_index": "0;1;0;0+2+3;1", "aff_unique_norm": "University of Edinburgh;Rice University;Alan Turing Institute;Google", "aff_unique_dep": ";;;Google Brain", "aff_unique_url": "https://www.ed.ac.uk;https://www.rice.edu;https://www.turing.ac.uk;https://brain.google.com", "aff_unique_abbr": "Edinburgh;Rice;ATI;Google Brain", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;0+0+1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Hamiltonian Variational Auto-Encoder", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11782", "id": "11782", "author_site": "Anthony Caterini, Arnaud Doucet, Dino Sejdinovic", "author": "Anthony L Caterini; Arnaud Doucet; Dino Sejdinovic", "abstract": "Variational Auto-Encoders (VAE) have become very popular techniques to perform\ninference and learning in latent variable models as they allow us to leverage the rich\nrepresentational power of neural networks to obtain flexible approximations of the\nposterior of latent variables as well as tight evidence lower bounds (ELBO). Com-\nbined with stochastic variational inference, this provides a methodology scaling to\nlarge datasets. However, for this methodology to be practically efficient, it is neces-\nsary to obtain low-variance unbiased estimators of the ELBO and its gradients with\nrespect to the parameters of interest. While the use of Markov chain Monte Carlo\n(MCMC) techniques such as Hamiltonian Monte Carlo (HMC) has been previously\nsuggested to achieve this [23, 26], the proposed methods require specifying reverse\nkernels which have a large impact on performance. Additionally, the resulting\nunbiased estimator of the ELBO for most MCMC kernels is typically not amenable\nto the reparameterization trick. We show here how to optimally select reverse\nkernels in this setting and, by building upon Hamiltonian Importance Sampling\n(HIS) [17], we obtain a scheme that provides low-variance unbiased estimators of\nthe ELBO and its gradients using the reparameterization trick. This allows us to\ndevelop a Hamiltonian Variational Auto-Encoder (HVAE). This method can be\nre-interpreted as a target-informed normalizing flow [20] which, within our context,\nonly requires a few evaluations of the gradient of the sampled likelihood and trivial\nJacobian calculations at each iteration.", "bibtex": "@inproceedings{NEURIPS2018_3202111c,\n author = {Caterini, Anthony L and Doucet, Arnaud and Sejdinovic, Dino},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hamiltonian Variational Auto-Encoder},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3202111cf90e7c816a472aaceb72b0df-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3202111cf90e7c816a472aaceb72b0df-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3202111cf90e7c816a472aaceb72b0df-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3202111cf90e7c816a472aaceb72b0df-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3202111cf90e7c816a472aaceb72b0df-Reviews.html", "metareview": "", "pdf_size": 468588, "gs_citation": 118, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13199503496722173919&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Statistics, University of Oxford + Alan Turing Institute for Data Science; Department of Statistics, University of Oxford + Alan Turing Institute for Data Science; Department of Statistics, University of Oxford + Alan Turing Institute for Data Science", "aff_domain": "stats.ox.ac.uk;stats.ox.ac.uk;stats.ox.ac.uk", "email": "stats.ox.ac.uk;stats.ox.ac.uk;stats.ox.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3202111cf90e7c816a472aaceb72b0df-Abstract.html", "aff_unique_index": "0+1;0+1;0+1", "aff_unique_norm": "University of Oxford;Alan Turing Institute", "aff_unique_dep": "Department of Statistics;Data Science", "aff_unique_url": "https://www.ox.ac.uk;https://www.turing.ac.uk", "aff_unique_abbr": "Oxford;ATI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0+0;0+0;0+0", "aff_country_unique": "United Kingdom" }, { "title": "Hardware Conditioned Policies for Multi-Robot Transfer Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11889", "id": "11889", "author_site": "Tao Chen, Adithyavairavan Murali, Abhinav Gupta", "author": "Tao Chen; Adithyavairavan Murali; Abhinav Gupta", "abstract": "Deep reinforcement learning could be used to learn dexterous robotic policies but it is challenging to transfer them to new robots with vastly different hardware properties. It is also prohibitively expensive to learn a new policy from scratch for each robot hardware due to the high sample complexity of modern state-of-the-art algorithms. We propose a novel approach called Hardware Conditioned Policies where we train a universal policy conditioned on a vector representation of robot hardware. We considered robots in simulation with varied dynamics, kinematic structure, kinematic lengths and degrees-of-freedom. First, we use the kinematic structure directly as the hardware encoding and show great zero-shot transfer to completely novel robots not seen during training. For robots with lower zero-shot success rate, we also demonstrate that fine-tuning the policy network is significantly more sample-efficient than training a model from scratch. In tasks where knowing the agent dynamics is important for success, we learn an embedding for robot hardware and show that policies conditioned on the encoding of hardware tend to generalize and transfer well. Videos of experiments are available at: https://sites.google.com/view/robot-transfer-hcp.", "bibtex": "@inproceedings{NEURIPS2018_b8cfbf77,\n author = {Chen, Tao and Murali, Adithyavairavan and Gupta, Abhinav},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hardware Conditioned Policies for Multi-Robot Transfer Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b8cfbf77a3d250a4523ba67a65a7d031-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b8cfbf77a3d250a4523ba67a65a7d031-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b8cfbf77a3d250a4523ba67a65a7d031-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b8cfbf77a3d250a4523ba67a65a7d031-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b8cfbf77a3d250a4523ba67a65a7d031-Reviews.html", "metareview": "", "pdf_size": 1360387, "gs_citation": 108, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11432360308578824406&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "The Robotics Institute, Carnegie Mellon University; The Robotics Institute, Carnegie Mellon University; The Robotics Institute, Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "https://sites.google.com/view/robot-transfer-hcp", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b8cfbf77a3d250a4523ba67a65a7d031-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "The Robotics Institute", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Hessian-based Analysis of Large Batch Training and Robustness to Adversaries", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11485", "id": "11485", "author_site": "Zhewei Yao, Amir Gholami, Qi Lei, Kurt Keutzer, Michael Mahoney", "author": "Zhewei Yao; Amir Gholami; Qi Lei; Kurt Keutzer; Michael W. Mahoney", "abstract": "Large batch size training of Neural Networks has been shown to incur accuracy\nloss when trained with the current methods. The exact underlying reasons for\nthis are still not completely understood. Here, we study large batch size\ntraining through the lens of the Hessian operator and robust optimization. In\nparticular, we perform a Hessian based study to analyze exactly how the landscape of the loss function changes when training with large batch size. We compute the true Hessian spectrum, without approximation, by back-propagating the second\nderivative. Extensive experiments on multiple networks show that saddle-points are\nnot the cause for generalization gap of large batch size training, and the results\nconsistently show that large batch converges to points with noticeably higher Hessian spectrum. Furthermore, we show that robust training allows one to favor flat areas, as points with large Hessian spectrum show poor robustness to adversarial perturbation. We further study this relationship, and provide empirical and theoretical proof that the inner loop for robust training is a saddle-free optimization problem \\textit{almost everywhere}. We present detailed experiments with five different network architectures, including a residual network, tested on MNIST, CIFAR-10/100 datasets.", "bibtex": "@inproceedings{NEURIPS2018_102f0bb6,\n author = {Yao, Zhewei and Gholami, Amir and Lei, Qi and Keutzer, Kurt and Mahoney, Michael W},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hessian-based Analysis of Large Batch Training and Robustness to Adversaries},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/102f0bb6efb3a6128a3c750dd16729be-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/102f0bb6efb3a6128a3c750dd16729be-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/102f0bb6efb3a6128a3c750dd16729be-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/102f0bb6efb3a6128a3c750dd16729be-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/102f0bb6efb3a6128a3c750dd16729be-Reviews.html", "metareview": "", "pdf_size": 1077185, "gs_citation": 195, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4488699145655690539&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "University of California at Berkeley; University of California at Berkeley; University of Texas at Austin; University of California at Berkeley; University of California at Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;ices.utexas.edu;berkeley.edu;berkeley.edu", "email": "berkeley.edu;berkeley.edu;ices.utexas.edu;berkeley.edu;berkeley.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/102f0bb6efb3a6128a3c750dd16729be-Abstract.html", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of California, Berkeley;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.utexas.edu", "aff_unique_abbr": "UC Berkeley;UT Austin", "aff_campus_unique_index": "0;0;1;0;0", "aff_campus_unique": "Berkeley;Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Heterogeneous Bitwidth Binarization in Convolutional Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11398", "id": "11398", "author_site": "Joshua Fromm, Shwetak Patel, Matthai Philipose", "author": "Joshua Fromm; Shwetak Patel; Matthai Philipose", "abstract": "Recent work has shown that fast, compact low-bitwidth neural networks can\nbe surprisingly accurate. These networks use homogeneous binarization: all\nparameters in each layer or (more commonly) the whole model have the same low\nbitwidth (e.g., 2 bits). However, modern hardware allows efficient designs where\neach arithmetic instruction can have a custom bitwidth, motivating heterogeneous\nbinarization, where every parameter in the network may have a different bitwidth.\nIn this paper, we show that it is feasible and useful to select bitwidths at the\nparameter granularity during training. For instance a heterogeneously quantized\nversion of modern networks such as AlexNet and MobileNet, with the right mix\nof 1-, 2- and 3-bit parameters that average to just 1.4 bits can equal the accuracy\nof homogeneous 2-bit versions of these networks. Further, we provide analyses\nto show that the heterogeneously binarized systems yield FPGA- and ASIC-based\nimplementations that are correspondingly more efficient in both circuit area and\nenergy efficiency than their homogeneous counterparts.", "bibtex": "@inproceedings{NEURIPS2018_1b36ea1c,\n author = {Fromm, Joshua and Patel, Shwetak and Philipose, Matthai},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Heterogeneous Bitwidth Binarization in Convolutional Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1b36ea1c9b7a1c3ad668b8bb5df7963f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1b36ea1c9b7a1c3ad668b8bb5df7963f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1b36ea1c9b7a1c3ad668b8bb5df7963f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1b36ea1c9b7a1c3ad668b8bb5df7963f-Reviews.html", "metareview": "", "pdf_size": 852865, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1091815518844737300&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of Electrical Engineering, University of Washington; Department of Computer Science, University of Washington; Microsoft Research", "aff_domain": "uw.edu;cs.washington.edu;microsoft.com", "email": "uw.edu;cs.washington.edu;microsoft.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1b36ea1c9b7a1c3ad668b8bb5df7963f-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Washington;Microsoft", "aff_unique_dep": "Department of Electrical Engineering;Microsoft Research", "aff_unique_url": "https://www.washington.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UW;MSR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Heterogeneous Multi-output Gaussian Process Prediction", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11648", "id": "11648", "author_site": "Pablo Moreno-Mu\u00f1oz, Antonio Art\u00e9s, Mauricio \u00c1lvarez", "author": "Pablo Moreno-Mu\u00f1oz; Antonio Art\u00e9s; Mauricio \u00c1lvarez", "abstract": "We present a novel extension of multi-output Gaussian processes for handling heterogeneous outputs. We assume that each output has its own likelihood function and use a vector-valued Gaussian process prior to jointly model the parameters in all likelihoods as latent functions. Our multi-output Gaussian process uses a covariance function with a linear model of coregionalisation form. Assuming conditional independence across the underlying latent functions together with an inducing variable framework, we are able to obtain tractable variational bounds amenable to stochastic variational inference. We illustrate the performance of the model on synthetic data and two real datasets: a human behavioral study and a demographic high-dimensional dataset.", "bibtex": "@inproceedings{NEURIPS2018_165a59f7,\n author = {Moreno-Mu\\~{n}oz, Pablo and Art\\'{e}s, Antonio and \\'{A}lvarez, Mauricio},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Heterogeneous Multi-output Gaussian Process Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/165a59f7cf3b5c4396ba65953d679f17-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/165a59f7cf3b5c4396ba65953d679f17-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/165a59f7cf3b5c4396ba65953d679f17-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/165a59f7cf3b5c4396ba65953d679f17-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/165a59f7cf3b5c4396ba65953d679f17-Reviews.html", "metareview": "", "pdf_size": 1450751, "gs_citation": 124, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16326528698943863964&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Dept. of Signal Theory and Communications, Universidad Carlos III de Madrid, Spain; Dept. of Signal Theory and Communications, Universidad Carlos III de Madrid, Spain; Dept. of Computer Science, University of Sheffield, UK", "aff_domain": "tsc.uc3m.es;tsc.uc3m.es;sheffield.ac.uk", "email": "tsc.uc3m.es;tsc.uc3m.es;sheffield.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/165a59f7cf3b5c4396ba65953d679f17-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Universidad Carlos III de Madrid;University of Sheffield", "aff_unique_dep": "Dept. of Signal Theory and Communications;Dept. of Computer Science", "aff_unique_url": "https://www.uc3m.es;https://www.sheffield.ac.uk", "aff_unique_abbr": "UC3M;Sheffield", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Spain;United Kingdom" }, { "title": "Hierarchical Graph Representation Learning with Differentiable Pooling", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11471", "id": "11471", "author_site": "Zhitao Ying, Jiaxuan You, Christopher Morris, Xiang Ren, Will Hamilton, Jure Leskovec", "author": "Zhitao Ying; Jiaxuan You; Christopher Morris; Xiang Ren; Will Hamilton; Jure Leskovec", "abstract": "Recently, graph neural networks (GNNs) have revolutionized the field of graph representation learning through effectively learned node embeddings, and achieved state-of-the-art results in tasks such as node classification and link prediction. However, current GNN methods are inherently flat and do not learn hierarchical representations of graphs---a limitation that is especially problematic for the task of graph classification, where the goal is to predict the label associated with an entire graph. Here we propose DiffPool, a differentiable graph pooling module that can generate hierarchical representations of graphs and can be combined with various graph neural network architectures in an end-to-end fashion. DiffPool learns a differentiable soft cluster assignment for nodes at each layer of a deep GNN, mapping nodes to a set of clusters, which then form the coarsened input for the next GNN layer. Our experimental results show that combining existing GNN methods with DiffPool yields an average improvement of 5-10% accuracy on graph classification benchmarks, compared to all existing pooling approaches, achieving a new state-of-the-art on four out of five benchmark datasets.", "bibtex": "@inproceedings{NEURIPS2018_e77dbaf6,\n author = {Ying, Zhitao and You, Jiaxuan and Morris, Christopher and Ren, Xiang and Hamilton, Will and Leskovec, Jure},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hierarchical Graph Representation Learning with Differentiable Pooling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e77dbaf6759253c7c6d0efc5690369c7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e77dbaf6759253c7c6d0efc5690369c7-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e77dbaf6759253c7c6d0efc5690369c7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e77dbaf6759253c7c6d0efc5690369c7-Reviews.html", "metareview": "", "pdf_size": 2425041, "gs_citation": 2077, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14312214754036446285&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 16, "aff": "Stanford University; Stanford University; TU Dortmund University; University of Southern California; Stanford University; Stanford University", "aff_domain": "stanford.edu;stanford.edu;udo.edu;usc.edu;stanford.edu;cs.stanford.edu", "email": "stanford.edu;stanford.edu;udo.edu;usc.edu;stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e77dbaf6759253c7c6d0efc5690369c7-Abstract.html", "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Stanford University;Technische Universit\u00e4t Dortmund;University of Southern California", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.tu-dortmund.de;https://www.usc.edu", "aff_unique_abbr": "Stanford;TU Dortmund;USC", "aff_campus_unique_index": "0;0;1;2;0;0", "aff_campus_unique": "Stanford;Dortmund;Los Angeles", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Hierarchical Reinforcement Learning for Zero-shot Generalization with Subtask Dependencies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11689", "id": "11689", "author_site": "Sungryull Sohn, Junhyuk Oh, Honglak Lee", "author": "Sungryull Sohn; Junhyuk Oh; Honglak Lee", "abstract": "We introduce a new RL problem where the agent is required to generalize to a previously-unseen environment characterized by a subtask graph which describes a set of subtasks and their dependencies. Unlike existing hierarchical multitask RL approaches that explicitly describe what the agent should do at a high level, our problem only describes properties of subtasks and relationships among them, which requires the agent to perform complex reasoning to find the optimal subtask to execute. To solve this problem, we propose a neural subtask graph solver (NSGS) which encodes the subtask graph using a recursive neural network embedding. To overcome the difficulty of training, we propose a novel non-parametric gradient-based policy, graph reward propagation, to pre-train our NSGS agent and further finetune it through actor-critic method. The experimental results on two 2D visual domains show that our agent can perform complex reasoning to find a near-optimal way of executing the subtask graph and generalize well to the unseen subtask graphs. In addition, we compare our agent with a Monte-Carlo tree search (MCTS) method showing that our method is much more efficient than MCTS, and the performance of NSGS can be further improved by combining it with MCTS.", "bibtex": "@inproceedings{NEURIPS2018_018dd1e0,\n author = {Sohn, Sungryull and Oh, Junhyuk and Lee, Honglak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hierarchical Reinforcement Learning for Zero-shot Generalization with Subtask Dependencies},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/018dd1e07a2de4a08e6612341bf2323e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/018dd1e07a2de4a08e6612341bf2323e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/018dd1e07a2de4a08e6612341bf2323e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/018dd1e07a2de4a08e6612341bf2323e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/018dd1e07a2de4a08e6612341bf2323e-Reviews.html", "metareview": "", "pdf_size": 1287501, "gs_citation": 104, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15468349230439204109&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of Michigan; University of Michigan + DeepMind; Google Brain + University of Michigan", "aff_domain": "umich.edu;google.com;google.com", "email": "umich.edu;google.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/018dd1e07a2de4a08e6612341bf2323e-Abstract.html", "aff_unique_index": "0;0+1;2+0", "aff_unique_norm": "University of Michigan;DeepMind;Google", "aff_unique_dep": ";;Google Brain", "aff_unique_url": "https://www.umich.edu;https://deepmind.com;https://brain.google.com", "aff_unique_abbr": "UM;DeepMind;Google Brain", "aff_campus_unique_index": ";1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0+1;0+0", "aff_country_unique": "United States;United Kingdom" }, { "title": "High Dimensional Linear Regression using Lattice Basis Reduction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11197", "id": "11197", "author_site": "Ilias Zadik, David Gamarnik", "author": "Ilias Zadik; David Gamarnik", "abstract": "We consider a high dimensional linear regression problem where the goal is to efficiently recover an unknown vector \\beta^* from n noisy linear observations Y=X \\beta^", "bibtex": "@inproceedings{NEURIPS2018_ccc0aa1b,\n author = {Zadik, Ilias and Gamarnik, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {High Dimensional Linear Regression using Lattice Basis Reduction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ccc0aa1b81bf81e16c676ddb977c5881-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ccc0aa1b81bf81e16c676ddb977c5881-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ccc0aa1b81bf81e16c676ddb977c5881-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ccc0aa1b81bf81e16c676ddb977c5881-Reviews.html", "metareview": "", "pdf_size": 714799, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5139469989093778943&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Sloan School of Management, Massachussetts Institute of Technology, Cambridge, MA 02139; Operations Research Center, Massachussetts Institute of Technology, Cambridge, MA 02139", "aff_domain": "mit.edu;mit.edu", "email": "mit.edu;mit.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ccc0aa1b81bf81e16c676ddb977c5881-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Sloan School of Management", "aff_unique_url": "https://mitsloan.mit.edu/", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "HitNet: Hybrid Ternary Recurrent Neural Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11083", "id": "11083", "author_site": "Peiqi Wang, Xinfeng Xie, Lei Deng, Guoqi Li, Dongsheng Wang, Yuan Xie", "author": "Peiqi Wang; Xinfeng Xie; Lei Deng; Guoqi Li; Dongsheng Wang; Yuan Xie", "abstract": "Quantization is a promising technique to reduce the model size, memory footprint, and massive computation operations of recurrent neural networks (RNNs) for embedded devices with limited resources. Although extreme low-bit quantization has achieved impressive success on convolutional neural networks, it still suffers from huge accuracy degradation on RNNs with the same low-bit precision. In this paper, we first investigate the accuracy degradation on RNN models under different quantization schemes, and the distribution of tensor values in the full precision model. Our observation reveals that due to the difference between the distributions of weights and activations, different quantization methods are suitable for different parts of models. Based on our observation, we propose HitNet, a hybrid ternary recurrent neural network, which bridges the accuracy gap between the full precision model and the quantized model. In HitNet, we develop a hybrid quantization method to quantize weights and activations. Moreover, we introduce a sloping factor motivated by prior work on Boltzmann machine to activation functions, further closing the accuracy gap between the full precision model and the quantized model. Overall, our HitNet can quantize RNN models into ternary values, {-1, 0, 1}, outperforming the state-of-the-art quantization methods on RNN models significantly. We test it on typical RNN models, such as Long-Short-Term Memory (LSTM) and Gated Recurrent Units (GRU), on which the results outperform previous work significantly. For example, we improve the perplexity per word (PPW) of a ternary LSTM on Penn Tree Bank (PTB) corpus from 126 (the state-of-the-art result to the best of our knowledge) to 110.3 with a full precision model in 97.2, and a ternary GRU from 142 to 113.5 with a full precision model in 102.7.", "bibtex": "@inproceedings{NEURIPS2018_82cec960,\n author = {Wang, Peiqi and Xie, Xinfeng and Deng, Lei and Li, Guoqi and Wang, Dongsheng and Xie, Yuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {HitNet: Hybrid Ternary Recurrent Neural Network},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/82cec96096d4281b7c95cd7e74623496-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/82cec96096d4281b7c95cd7e74623496-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/82cec96096d4281b7c95cd7e74623496-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/82cec96096d4281b7c95cd7e74623496-Reviews.html", "metareview": "", "pdf_size": 538752, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=539524031929870697&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Department of Computer Science and Technology, Tsinghua University + Beijing National Research Center for Information Science and Technology; Department of Electrical and Computer Engineering, University of California, Santa Barbara; Department of Electrical and Computer Engineering, University of California, Santa Barbara; Department of Precision Instrument, Tsinghua University; Department of Computer Science and Technology, Tsinghua University + Beijing National Research Center for Information Science and Technology; Department of Electrical and Computer Engineering, University of California, Santa Barbara", "aff_domain": "mails.tsinghua.edu.cn;ucsb.edu;ucsb.edu;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;ucsb.edu", "email": "mails.tsinghua.edu.cn;ucsb.edu;ucsb.edu;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn;ucsb.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/82cec96096d4281b7c95cd7e74623496-Abstract.html", "aff_unique_index": "0+1;2;2;0;0+1;2", "aff_unique_norm": "Tsinghua University;Beijing National Research Center for Information Science and Technology;University of California, Santa Barbara", "aff_unique_dep": "Department of Computer Science and Technology;;Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.tsinghua.edu.cn;;https://www.ucsb.edu", "aff_unique_abbr": "THU;;UCSB", "aff_campus_unique_index": ";1;1;;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0+0;1;1;0;0+0;1", "aff_country_unique": "China;United States" }, { "title": "Horizon-Independent Minimax Linear Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11514", "id": "11514", "author_site": "Alan Malek, Peter Bartlett", "author": "Alan Malek; Peter L Bartlett", "abstract": "We consider online linear regression: at each round, an adversary reveals a covariate vector, the learner predicts a real value, the adversary reveals a label, and the learner suffers the squared prediction error. The aim is to minimize the difference between the cumulative loss and that of the linear predictor that is best in hindsight. Previous work demonstrated that the minimax optimal strategy is easy to compute recursively from the end of the game; this requires the entire sequence of covariate vectors in advance. We show that, once provided with a measure of the scale of the problem, we can invert the recursion and play the minimax strategy without knowing the future covariates. Further, we show that this forward recursion remains optimal even against adaptively chosen labels and covariates, provided that the adversary adheres to a set of constraints that prevent misrepresentation of the scale of the problem. This strategy is horizon-independent in that the regret and minimax strategies depend on the size of the constraint set and not on the time-horizon, and hence it incurs no more regret than the optimal strategy that knows in advance the number of rounds of the game. We also provide an interpretation of the minimax algorithm as a follow-the-regularized-leader strategy with a data-dependent regularizer and obtain an explicit expression for the minimax regret.", "bibtex": "@inproceedings{NEURIPS2018_84b20b1f,\n author = {Malek, Alan and Bartlett, Peter L},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Horizon-Independent Minimax Linear Regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/84b20b1f5a0d103f5710bb67a043cd78-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/84b20b1f5a0d103f5710bb67a043cd78-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/84b20b1f5a0d103f5710bb67a043cd78-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/84b20b1f5a0d103f5710bb67a043cd78-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/84b20b1f5a0d103f5710bb67a043cd78-Reviews.html", "metareview": "", "pdf_size": 332273, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1869048162833509165&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff": "Laboratory for Information and Decision Systems, Massachusetts Institute of Technology; Department of EECS and Statistics, University of California", "aff_domain": "mit.edu;cs.berkeley.edu", "email": "mit.edu;cs.berkeley.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/84b20b1f5a0d103f5710bb67a043cd78-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;University of California", "aff_unique_dep": "Laboratory for Information and Decision Systems;Department of EECS and Statistics", "aff_unique_url": "https://web.mit.edu;https://www.universityofcalifornia.edu", "aff_unique_abbr": "MIT;UC", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "How Does Batch Normalization Help Optimization?", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11257", "id": "11257", "author_site": "Shibani Santurkar, Dimitris Tsipras, Andrew Ilyas, Aleksander Madry", "author": "Shibani Santurkar; Dimitris Tsipras; Andrew Ilyas; Aleksander Madry", "abstract": "Batch Normalization (BatchNorm) is a widely adopted technique that enables faster and more stable training of deep neural networks (DNNs).\nDespite its pervasiveness, the exact reasons for BatchNorm's effectiveness are still poorly understood.\nThe popular belief is that this effectiveness stems from controlling the change of the layers' input distributions during training to reduce the so-called \"internal covariate shift\".\nIn this work, we demonstrate that such distributional stability of layer inputs has little to do with the success of BatchNorm.\nInstead, we uncover a more fundamental impact of BatchNorm on the training process: it makes the optimization landscape significantly smoother.\nThis smoothness induces a more predictive and stable behavior of the gradients, allowing for faster training.", "bibtex": "@inproceedings{NEURIPS2018_905056c1,\n author = {Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew and Madry, Aleksander},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {How Does Batch Normalization Help Optimization?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/905056c1ac1dad141560467e0a99e1cf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/905056c1ac1dad141560467e0a99e1cf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/905056c1ac1dad141560467e0a99e1cf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/905056c1ac1dad141560467e0a99e1cf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/905056c1ac1dad141560467e0a99e1cf-Reviews.html", "metareview": "", "pdf_size": 1770504, "gs_citation": 2422, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9444562420324395093&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "MIT; MIT; MIT; MIT", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "email": "mit.edu;mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/905056c1ac1dad141560467e0a99e1cf-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "How Many Samples are Needed to Estimate a Convolutional Neural Network?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11062", "id": "11062", "author_site": "Simon Du, Yining Wang, Xiyu Zhai, Sivaraman Balakrishnan, Russ Salakhutdinov, Aarti Singh", "author": "Simon S Du; Yining Wang; Xiyu Zhai; Sivaraman Balakrishnan; Ruslan Salakhutdinov; Aarti Singh", "abstract": "A widespread folklore for explaining the success of Convolutional Neural Networks (CNNs) is that CNNs use a more compact representation than the Fully-connected Neural Network (FNN) and thus require fewer training samples to accurately estimate their parameters. We initiate the study of rigorously characterizing the sample complexity of estimating CNNs. We show that for an $m$-dimensional convolutional filter with linear activation acting on a $d$-dimensional input, the sample complexity of achieving population prediction error of $\\epsilon$ is $\\widetilde{O(m/\\epsilon^2)$, whereas the sample-complexity for its FNN counterpart is lower bounded by $\\Omega(d/\\epsilon^2)$ samples. Since, in typical settings $m \\ll d$, this result demonstrates the advantage of using a CNN. We further consider the sample complexity of estimating a one-hidden-layer CNN with linear activation where both the $m$-dimensional convolutional filter and the $r$-dimensional output weights are unknown. For this model, we show that the sample complexity is $\\widetilde{O}\\left((m+r)/\\epsilon^2\\right)$ when the ratio between the stride size and the filter size is a constant. For both models, we also present lower bounds showing our sample complexities are tight up to logarithmic factors. Our main tools for deriving these results are a localized empirical process analysis and a new lemma characterizing the convolutional structure. We believe that these tools may inspire further developments in understanding CNNs.", "bibtex": "@inproceedings{NEURIPS2018_03c6b069,\n author = {Du, Simon S and Wang, Yining and Zhai, Xiyu and Balakrishnan, Sivaraman and Salakhutdinov, Russ R and Singh, Aarti},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {How Many Samples are Needed to Estimate a Convolutional Neural Network?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/03c6b06952c750899bb03d998e631860-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/03c6b06952c750899bb03d998e631860-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/03c6b06952c750899bb03d998e631860-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/03c6b06952c750899bb03d998e631860-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/03c6b06952c750899bb03d998e631860-Reviews.html", "metareview": "", "pdf_size": 1867333, "gs_citation": 88, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7841336768178005333&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/03c6b06952c750899bb03d998e631860-Abstract.html" }, { "title": "How Much Restricted Isometry is Needed In Nonconvex Matrix Recovery?", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11545", "id": "11545", "author_site": "Richard Zhang, Cedric Josz, Somayeh Sojoudi, Javad Lavaei", "author": "Richard Zhang; Cedric Josz; Somayeh Sojoudi; Javad Lavaei", "abstract": "When the linear measurements of an instance of low-rank matrix recovery\nsatisfy a restricted isometry property (RIP) --- i.e. they\nare approximately norm-preserving --- the problem is known\nto contain no spurious local minima, so exact recovery is guaranteed.\nIn this paper, we show that moderate RIP is not enough to eliminate\nspurious local minima, so existing results can only hold for near-perfect\nRIP. In fact, counterexamples are ubiquitous: every $x$ is the spurious\nlocal minimum of a rank-1 instance of matrix recovery that satisfies\nRIP. One specific counterexample has RIP constant $\\delta=1/2$, but\ncauses randomly initialized stochastic gradient descent (SGD) to fail\n12\\% of the time. SGD is frequently able to avoid and escape spurious\nlocal minima, but this empirical result shows that it can occasionally\nbe defeated by their existence. Hence, while exact recovery guarantees\nwill likely require a proof of no spurious local minima, arguments\nbased solely on norm preservation will only be applicable to a narrow\nset of nearly-isotropic instances.", "bibtex": "@inproceedings{NEURIPS2018_f8da71e5,\n author = {Zhang, Richard and Josz, Cedric and Sojoudi, Somayeh and Lavaei, Javad},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {How Much Restricted Isometry is Needed In Nonconvex Matrix Recovery?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f8da71e562ff44a2bc7edf3578c593da-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f8da71e562ff44a2bc7edf3578c593da-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f8da71e562ff44a2bc7edf3578c593da-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f8da71e562ff44a2bc7edf3578c593da-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f8da71e562ff44a2bc7edf3578c593da-Reviews.html", "metareview": "", "pdf_size": 3171248, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18222082844441671988&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley", "aff_domain": "alum.mit.edu;gmail.com;berkeley.edu;berkeley.edu", "email": "alum.mit.edu;gmail.com;berkeley.edu;berkeley.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f8da71e562ff44a2bc7edf3578c593da-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "How SGD Selects the Global Minima in Over-parameterized Learning: A Dynamical Stability Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11792", "id": "11792", "author_site": "Lei Wu, Chao Ma, Weinan E", "author": "Lei Wu; Chao Ma; Weinan E", "abstract": "The question of which global minima are accessible by a stochastic gradient decent (SGD) algorithm with specific learning rate and batch size is studied from the perspective of dynamical stability. The concept of non-uniformity is introduced, which, together with sharpness, characterizes the stability property of a global minimum and hence the accessibility of a particular SGD algorithm to that global minimum. In particular, this analysis shows that learning rate and batch size play different roles in minima selection. Extensive empirical results seem to correlate well with the theoretical findings and provide further support to these claims.", "bibtex": "@inproceedings{NEURIPS2018_6651526b,\n author = {Wu, Lei and Ma, Chao and E, Weinan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {How SGD Selects the Global Minima in Over-parameterized Learning: A Dynamical Stability Perspective},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6651526b6fb8f29a00507de6a49ce30f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6651526b6fb8f29a00507de6a49ce30f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6651526b6fb8f29a00507de6a49ce30f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6651526b6fb8f29a00507de6a49ce30f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6651526b6fb8f29a00507de6a49ce30f-Reviews.html", "metareview": "", "pdf_size": 2627231, "gs_citation": 259, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1980119340021099329&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "School of Mathematical Sciences, Peking University; Program in Applied and Computational Mathematics, Princeton University; Department of Mathematics and Program in Applied and Computational Mathematics, Princeton University + Beijing Institute of Big Data Research", "aff_domain": "pku.edu.cn;princeton.edu;math.princeton.edu", "email": "pku.edu.cn;princeton.edu;math.princeton.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6651526b6fb8f29a00507de6a49ce30f-Abstract.html", "aff_unique_index": "0;1;1+2", "aff_unique_norm": "Peking University;Princeton University;Beijing Institute of Big Data Research", "aff_unique_dep": "School of Mathematical Sciences;Program in Applied and Computational Mathematics;", "aff_unique_url": "http://www.pku.edu.cn;https://www.princeton.edu;", "aff_unique_abbr": "PKU;Princeton;", "aff_campus_unique_index": "0;", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;1;1+0", "aff_country_unique": "China;United States" }, { "title": "How To Make the Gradients Small Stochastically: Even Faster Convex and Nonconvex SGD", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11134", "id": "11134", "author": "Zeyuan Allen-Zhu", "abstract": "Stochastic gradient descent (SGD) gives an optimal convergence rate when minimizing convex stochastic objectives $f(x)$. However, in terms of making the gradients small, the original SGD does not give an optimal rate, even when $f(x)$ is convex.\n\nIf $f(x)$ is convex, to find a point with gradient norm $\\varepsilon$, we design an algorithm SGD3 with a near-optimal rate $\\tilde{O}(\\varepsilon^{-2})$, improving the best known rate $O(\\varepsilon^{-8/3})$. If $f(x)$ is nonconvex, to find its $\\varepsilon$-approximate local minimum, we design an algorithm SGD5 with rate $\\tilde{O}(\\varepsilon^{-3.5})$, where previously SGD variants only achieve $\\tilde{O}(\\varepsilon^{-4})$. This is no slower than the best known stochastic version of Newton's method in all parameter regimes.", "bibtex": "@inproceedings{NEURIPS2018_996a7fa0,\n author = {Allen-Zhu, Zeyuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {How To Make the Gradients Small Stochastically: Even Faster Convex and Nonconvex SGD},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/996a7fa078cc36c46d02f9af3bef918b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/996a7fa078cc36c46d02f9af3bef918b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/996a7fa078cc36c46d02f9af3bef918b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/996a7fa078cc36c46d02f9af3bef918b-Reviews.html", "metareview": "", "pdf_size": 463076, "gs_citation": 122, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13518566403153992850&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Microsoft Research AI", "aff_domain": "csail.mit.edu", "email": "csail.mit.edu", "github": "", "project": "https://arxiv.org/abs/1801.02982", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/996a7fa078cc36c46d02f9af3bef918b-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "AI", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "How to Start Training: The Effect of Initialization and Architecture", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11080", "id": "11080", "author_site": "Boris Hanin, David Rolnick", "author": "Boris Hanin; David Rolnick", "abstract": "We identify and study two common failure modes for early training in deep ReLU nets. For each, we give a rigorous proof of when it occurs and how to avoid it, for fully connected, convolutional, and residual architectures. We show that the first failure mode, exploding or vanishing mean activation length, can be avoided by initializing weights from a symmetric distribution with variance 2/fan-in and, for ResNets, by correctly scaling the residual modules. We prove that the second failure mode, exponentially large variance of activation length, never occurs in residual nets once the first failure mode is avoided. In contrast, for fully connected nets, we prove that this failure mode can happen and is avoided by keeping constant the sum of the reciprocals of layer widths. We demonstrate empirically the effectiveness of our theoretical results in predicting when networks are able to start training. In particular, we note that many popular initializations fail our criteria, whereas correct initialization and architecture allows much deeper networks to be trained.", "bibtex": "@inproceedings{NEURIPS2018_d81f9c1b,\n author = {Hanin, Boris and Rolnick, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {How to Start Training: The Effect of Initialization and Architecture},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d81f9c1be2e08964bf9f24b15f0e4900-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d81f9c1be2e08964bf9f24b15f0e4900-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d81f9c1be2e08964bf9f24b15f0e4900-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d81f9c1be2e08964bf9f24b15f0e4900-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d81f9c1be2e08964bf9f24b15f0e4900-Reviews.html", "metareview": "", "pdf_size": 2164922, "gs_citation": 322, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9627141893890263732&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Mathematics, Texas A& M University; Department of Mathematics, Massachusetts Institute of Technology", "aff_domain": "math.tamu.edu;mit.edu", "email": "math.tamu.edu;mit.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d81f9c1be2e08964bf9f24b15f0e4900-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Texas A&M University;Massachusetts Institute of Technology", "aff_unique_dep": "Department of Mathematics;Department of Mathematics", "aff_unique_url": "https://www.tamu.edu;https://web.mit.edu", "aff_unique_abbr": "TAMU;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "How to tell when a clustering is (approximately) correct using convex relaxations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11713", "id": "11713", "author": "Marina Meila", "abstract": "We introduce the Sublevel Set (SS) method, a generic method to obtain sufficient guarantees of near-optimality and uniqueness (up to small perturbations) for a clustering. This method can be instantiated for a variety of clustering loss functions for which convex relaxations exist. Obtaining the guarantees in practice amounts to solving a convex optimization. We demonstrate the applicability of this method by obtaining distribution free guarantees for K-means clustering on realistic data sets.", "bibtex": "@inproceedings{NEURIPS2018_882735cb,\n author = {Meila, Marina},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {How to tell when a clustering is (approximately) correct using convex relaxations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/882735cbdfd9f810814d17892ae50023-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/882735cbdfd9f810814d17892ae50023-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/882735cbdfd9f810814d17892ae50023-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/882735cbdfd9f810814d17892ae50023-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/882735cbdfd9f810814d17892ae50023-Reviews.html", "metareview": "", "pdf_size": 830450, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7084099883569748387&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "University of Washington", "aff_domain": "stat.washington.edu", "email": "stat.washington.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/882735cbdfd9f810814d17892ae50023-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Human-in-the-Loop Interpretability Prior", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11963", "id": "11963", "author_site": "Isaac Lage, Andrew Ross, Samuel J Gershman, Been Kim, Finale Doshi-Velez", "author": "Isaac Lage; Andrew Ross; Samuel J Gershman; Been Kim; Finale Doshi-Velez", "abstract": "We often desire our models to be interpretable as well as accurate. Prior work on optimizing models for interpretability has relied on easy-to-quantify proxies for interpretability, such as sparsity or the number of operations required. In this work, we optimize for interpretability by directly including humans in the optimization loop. We develop an algorithm that minimizes the number of user studies to find models that are both predictive and interpretable and demonstrate our approach on several data sets. Our human subjects results show trends towards different proxy notions of interpretability on different datasets, which suggests that different proxies are preferred on different tasks.", "bibtex": "@inproceedings{NEURIPS2018_0a7d83f0,\n author = {Lage, Isaac and Ross, Andrew and Gershman, Samuel J and Kim, Been and Doshi-Velez, Finale},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Human-in-the-Loop Interpretability Prior},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0a7d83f084ec258aefd128569dda03d7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0a7d83f084ec258aefd128569dda03d7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0a7d83f084ec258aefd128569dda03d7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0a7d83f084ec258aefd128569dda03d7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0a7d83f084ec258aefd128569dda03d7-Reviews.html", "metareview": "", "pdf_size": 442548, "gs_citation": 177, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6925363852455924380&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 18, "aff": "Department of Computer Science, Harvard University; Department of Computer Science, Harvard University; Google Brain; Department of Psychology, Harvard University; Department of Computer Science, Harvard University", "aff_domain": "g.harvard.edu;g.harvard.edu;google.com;fas.harvard.edu;seas.harvard.edu", "email": "g.harvard.edu;g.harvard.edu;google.com;fas.harvard.edu;seas.harvard.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0a7d83f084ec258aefd128569dda03d7-Abstract.html", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Harvard University;Google", "aff_unique_dep": "Department of Computer Science;Google Brain", "aff_unique_url": "https://www.harvard.edu;https://brain.google.com", "aff_unique_abbr": "Harvard;Google Brain", "aff_campus_unique_index": "0;0;1;0;0", "aff_campus_unique": "Cambridge;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hunting for Discriminatory Proxies in Linear Regression Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11450", "id": "11450", "author_site": "Samuel Yeom, Anupam Datta, Matt Fredrikson", "author": "Samuel Yeom; Anupam Datta; Matt Fredrikson", "abstract": "A machine learning model may exhibit discrimination when used to make decisions involving people. One potential cause for such outcomes is that the model uses a statistical proxy for a protected demographic attribute. In this paper we formulate a definition of proxy use for the setting of linear regression and present algorithms for detecting proxies. Our definition follows recent work on proxies in classification models, and characterizes a model's constituent behavior that: 1) correlates closely with a protected random variable, and 2) is causally influential in the overall behavior of the model. We show that proxies in linear regression models can be efficiently identified by solving a second-order cone program, and further extend this result to account for situations where the use of a certain input variable is justified as a ``business necessity''. Finally, we present empirical results on two law enforcement datasets that exhibit varying degrees of racial disparity in prediction outcomes, demonstrating that proxies shed useful light on the causes of discriminatory behavior in models.", "bibtex": "@inproceedings{NEURIPS2018_6cd9313e,\n author = {Yeom, Samuel and Datta, Anupam and Fredrikson, Matt},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hunting for Discriminatory Proxies in Linear Regression Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6cd9313ed34ef58bad3fdd504355e72c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6cd9313ed34ef58bad3fdd504355e72c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6cd9313ed34ef58bad3fdd504355e72c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6cd9313ed34ef58bad3fdd504355e72c-Reviews.html", "metareview": "", "pdf_size": 305592, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17791730521792764075&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6cd9313ed34ef58bad3fdd504355e72c-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Hybrid Knowledge Routed Modules for Large-scale Object Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11170", "id": "11170", "author_site": "ChenHan Jiang, Hang Xu, Xiaodan Liang, Liang Lin", "author": "ChenHan Jiang; Hang Xu; Xiaodan Liang; Liang Lin", "abstract": "Abstract The dominant object detection approaches treat the recognition of each region separately and overlook crucial semantic correlations between objects in one scene. This paradigm leads to substantial performance drop when facing heavy long-tail problems, where very few samples are available for rare classes and plenty of confusing categories exists. We exploit diverse human commonsense knowledge for reasoning over large-scale object categories and reaching semantic coherency within one image. Particularly, we present Hybrid Knowledge Routed Modules (HKRM) that incorporates the reasoning routed by two kinds of knowledge forms: an explicit knowledge module for structured constraints that are summarized with linguistic knowledge (e.g. shared attributes, relationships) about concepts; and an implicit knowledge module that depicts some implicit constraints (e.g. common spatial layouts). By functioning over a region-to-region graph, both modules can be individualized and adapted to coordinate with visual patterns in each image, guided by specific knowledge forms. HKRM are light-weight, general-purpose and extensible by easily incorporating multiple knowledge to endow any detection networks the ability of global semantic reasoning. Experiments on large-scale object detection benchmarks show HKRM obtains around 34.5% improvement on VisualGenome (1000 categories) and 30.4% on ADE in terms of mAP.", "bibtex": "@inproceedings{NEURIPS2018_72da7fd6,\n author = {Jiang, ChenHan and Xu, Hang and Liang, Xiaodan and Lin, Liang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hybrid Knowledge Routed Modules for Large-scale Object Detection},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/72da7fd6d1302c0a159f6436d01e9eb0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/72da7fd6d1302c0a159f6436d01e9eb0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/72da7fd6d1302c0a159f6436d01e9eb0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/72da7fd6d1302c0a159f6436d01e9eb0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/72da7fd6d1302c0a159f6436d01e9eb0-Reviews.html", "metareview": "", "pdf_size": 4633279, "gs_citation": 112, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18227077982790889117&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Sun Yat-Sen University; Huawei Noah\u2019s Ark Lab; School of Intelligent Systems Engineering+Sun Yat-Sen University; Sun Yat-Sen University", "aff_domain": "gmail.com;live.com;gmail.com;ieee.org", "email": "gmail.com;live.com;gmail.com;ieee.org", "github": "https://github.com/chanyn/HKRM", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/72da7fd6d1302c0a159f6436d01e9eb0-Abstract.html", "aff_unique_index": "0;1;2+0;0", "aff_unique_norm": "Sun Yat-sen University;Huawei;School of Intelligent Systems Engineering", "aff_unique_dep": ";Noah\u2019s Ark Lab;Intelligent Systems Engineering", "aff_unique_url": "http://www.sysu.edu.cn/;https://www.huawei.com;", "aff_unique_abbr": "SYSU;Huawei;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "title": "Hybrid Macro/Micro Level Backpropagation for Training Deep Spiking Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11675", "id": "11675", "author_site": "Yingyezhe Jin, Wenrui Zhang, Peng Li", "author": "Yingyezhe Jin; Wenrui Zhang; Peng Li", "abstract": "Spiking neural networks (SNNs) are positioned to enable spatio-temporal information processing and ultra-low power event-driven neuromorphic hardware. However, SNNs are yet to reach the same performances of conventional deep artificial neural networks (ANNs), a long-standing challenge due to complex dynamics and non-differentiable spike events encountered in training. The existing SNN error backpropagation (BP) methods are limited in terms of scalability, lack of proper handling of spiking discontinuities, and/or mismatch between the rate-coded loss function and computed gradient. We present a hybrid macro/micro level backpropagation (HM2-BP) algorithm for training multi-layer SNNs. The temporal effects are precisely captured by the proposed spike-train level post-synaptic potential (S-PSP) at the microscopic level. The rate-coded errors are defined at the macroscopic level, computed and back-propagated across both macroscopic and microscopic levels. Different from existing BP methods, HM2-BP directly computes the gradient of the rate-coded loss function w.r.t tunable parameters. We evaluate the proposed HM2-BP algorithm by training deep fully connected and convolutional SNNs based on the static MNIST [14] and dynamic neuromorphic N-MNIST [26]. HM2-BP achieves an accuracy level of 99.49% and 98.88% for MNIST and N-MNIST, respectively, outperforming the best reported performances obtained from the existing SNN BP algorithms. Furthermore, the HM2-BP produces the highest accuracies based on SNNs for the EMNIST [3] dataset, and leads to high recognition accuracy for the 16-speaker spoken English letters of TI46 Corpus [16], a challenging patio-temporal speech recognition benchmark for which no prior success based on SNNs was reported. It also achieves competitive performances surpassing those of conventional deep learning models when dealing with asynchronous spiking streams.", "bibtex": "@inproceedings{NEURIPS2018_3fb04953,\n author = {Jin, Yingyezhe and Zhang, Wenrui and Li, Peng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hybrid Macro/Micro Level Backpropagation for Training Deep Spiking Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3fb04953d95a94367bb133f862402bce-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3fb04953d95a94367bb133f862402bce-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3fb04953d95a94367bb133f862402bce-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3fb04953d95a94367bb133f862402bce-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3fb04953d95a94367bb133f862402bce-Reviews.html", "metareview": "", "pdf_size": 1274850, "gs_citation": 247, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6794497534863732123&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3fb04953d95a94367bb133f862402bce-Abstract.html" }, { "title": "Hybrid Retrieval-Generation Reinforced Agent for Medical Image Report Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11168", "id": "11168", "author_site": "Yuan Li, Xiaodan Liang, Zhiting Hu, Eric Xing", "author": "Yuan Li; Xiaodan Liang; Zhiting Hu; Eric P Xing", "abstract": "Generating long and coherent reports to describe medical images poses challenges to bridging visual patterns with informative human linguistic descriptions. We propose a novel Hybrid Retrieval-Generation Reinforced Agent (HRGR-Agent) which reconciles traditional retrieval-based approaches populated with human prior knowledge, with modern learning-based approaches to achieve structured, robust, and diverse report generation. HRGR-Agent employs a hierarchical decision-making procedure. For each sentence, a high-level retrieval policy module chooses to either retrieve a template sentence from an off-the-shelf template database, or invoke a low-level generation module to generate a new sentence. HRGR-Agent is updated via reinforcement learning, guided by sentence-level and word-level rewards. Experiments show that our approach achieves the state-of-the-art results on two medical report datasets, generating well-balanced structured sentences with robust coverage of heterogeneous medical report contents. In addition, our model achieves the highest detection precision of medical abnormality terminologies, and improved human evaluation performance.", "bibtex": "@inproceedings{NEURIPS2018_e0741335,\n author = {Li, Yuan and Liang, Xiaodan and Hu, Zhiting and Xing, Eric P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hybrid Retrieval-Generation Reinforced Agent for Medical Image Report Generation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e07413354875be01a996dc560274708e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e07413354875be01a996dc560274708e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e07413354875be01a996dc560274708e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e07413354875be01a996dc560274708e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e07413354875be01a996dc560274708e-Reviews.html", "metareview": "", "pdf_size": 1281066, "gs_citation": 453, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17238049737650911334&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Duke University + Petuum, Inc; Carnegie Mellon University; Carnegie Mellon University; Petuum, Inc", "aff_domain": "duke.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "duke.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e07413354875be01a996dc560274708e-Abstract.html", "aff_unique_index": "0+1;2;2;1", "aff_unique_norm": "Duke University;Petuum, Inc;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.duke.edu;https://www.petuum.com;https://www.cmu.edu", "aff_unique_abbr": "Duke;;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;0;0", "aff_country_unique": "United States" }, { "title": "Hybrid-MST: A Hybrid Active Sampling Strategy for Pairwise Preference Aggregation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11349", "id": "11349", "author_site": "JING LI, Rafal Mantiuk, Junle Wang, Suiyi Ling, Patrick Le Callet", "author": "JING LI; Rafal Mantiuk; Junle Wang; Suiyi Ling; Patrick Le Callet", "abstract": "In this paper we present a hybrid active sampling strategy for pairwise preference aggregation, which aims at recovering the underlying rating of the test candidates from sparse and noisy pairwise labeling. Our method employs Bayesian optimization framework and Bradley-Terry model to construct the utility function, then to obtain the Expected Information Gain (EIG) of each pair. For computational efficiency, Gaussian-Hermite quadrature is used for estimation of EIG. In this work, a hybrid active sampling strategy is proposed, either using Global Maximum (GM) EIG sampling or Minimum Spanning Tree (MST) sampling in each trial, which is determined by the test budget. The proposed method has been validated on both simulated and real-world datasets, where it shows higher preference aggregation ability than the state-of-the-art methods.", "bibtex": "@inproceedings{NEURIPS2018_8b6a80c3,\n author = {LI, JING and Mantiuk, Rafal and Wang, Junle and Ling, Suiyi and Le Callet, Patrick},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hybrid-MST: A Hybrid Active Sampling Strategy for Pairwise Preference Aggregation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8b6a80c3cf2cbd5f967063618dc54f39-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8b6a80c3cf2cbd5f967063618dc54f39-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8b6a80c3cf2cbd5f967063618dc54f39-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8b6a80c3cf2cbd5f967063618dc54f39-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8b6a80c3cf2cbd5f967063618dc54f39-Reviews.html", "metareview": "", "pdf_size": 1374507, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13558401002999071074&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8b6a80c3cf2cbd5f967063618dc54f39-Abstract.html" }, { "title": "Hyperbolic Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11522", "id": "11522", "author_site": "Octavian Ganea, Gary Becigneul, Thomas Hofmann", "author": "Octavian Ganea; Gary Becigneul; Thomas Hofmann", "abstract": "Hyperbolic spaces have recently gained momentum in the context of machine learning due to their high capacity and tree-likeliness properties. However, the representational power of hyperbolic geometry is not yet on par with Euclidean geometry, firstly because of the absence of corresponding hyperbolic neural network layers. Here, we bridge this gap in a principled manner by combining the formalism of M\u00f6bius gyrovector spaces with the Riemannian geometry of the Poincar\u00e9 model of hyperbolic spaces. As a result, we derive hyperbolic versions of important deep learning tools: multinomial logistic regression, feed-forward and recurrent neural networks. This allows to embed sequential data and perform classification in the hyperbolic space. Empirically, we show that, even if hyperbolic optimization tools are limited, hyperbolic sentence embeddings either outperform or are on par with their Euclidean variants on textual entailment and noisy-prefix recognition tasks.", "bibtex": "@inproceedings{NEURIPS2018_dbab2adc,\n author = {Ganea, Octavian and Becigneul, Gary and Hofmann, Thomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hyperbolic Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dbab2adc8f9d078009ee3fa810bea142-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dbab2adc8f9d078009ee3fa810bea142-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dbab2adc8f9d078009ee3fa810bea142-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dbab2adc8f9d078009ee3fa810bea142-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dbab2adc8f9d078009ee3fa810bea142-Reviews.html", "metareview": "", "pdf_size": 677106, "gs_citation": 738, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12122146629122312177&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Dept. of Computer Science, ETH Z\u00fcrich, Zurich, Switzerland; Dept. of Computer Science, ETH Z\u00fcrich, Zurich, Switzerland; Dept. of Computer Science, ETH Z\u00fcrich, Zurich, Switzerland", "aff_domain": "inf.ethz.ch;inf.ethz.ch; ", "email": "inf.ethz.ch;inf.ethz.ch; ", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dbab2adc8f9d078009ee3fa810bea142-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Dept. of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETH", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Zurich", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Identification and Estimation of Causal Effects from Dependent Data", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11897", "id": "11897", "author_site": "Eli Sherman, Ilya Shpitser", "author": "Eli Sherman; Ilya Shpitser", "abstract": "The assumption that data samples are independent and identically distributed (iid) is standard in many areas of statistics and machine learning. Nevertheless, in some settings, such as social networks, infectious disease modeling, and reasoning with spatial and temporal data, this assumption is false. An extensive literature exists on making causal inferences under the iid assumption [12, 8, 21, 16], but, as pointed out in [14], causal inference in non-iid contexts is challenging due to the combination of unobserved confounding bias and data dependence. In this paper we develop a general theory describing when causal inferences are possible in such scenarios. We use segregated graphs [15], a generalization of latent projection mixed graphs [23], to represent causal models of this type and provide a complete algorithm for non-parametric identification in these models. We then demonstrate how statistical inferences may be performed on causal parameters identified by this algorithm, even in cases where parts of the model exhibit full interference, meaning only a single sample is available for parts of the model [19]. We apply these techniques to a synthetic data set which considers the adoption of fake news articles given the social network structure, articles read by each person, and baseline demographics and socioeconomic covariates.", "bibtex": "@inproceedings{NEURIPS2018_024677ef,\n author = {Sherman, Eli and Shpitser, Ilya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Identification and Estimation of Causal Effects from Dependent Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/024677efb8e4aee2eaeef17b54695bbe-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/024677efb8e4aee2eaeef17b54695bbe-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/024677efb8e4aee2eaeef17b54695bbe-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/024677efb8e4aee2eaeef17b54695bbe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/024677efb8e4aee2eaeef17b54695bbe-Reviews.html", "metareview": "", "pdf_size": 398890, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1515242654355907439&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "Department of Computer Science, Johns Hopkins University; Department of Computer Science, Johns Hopkins University", "aff_domain": "jhu.edu;cs.jhu.edu", "email": "jhu.edu;cs.jhu.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/024677efb8e4aee2eaeef17b54695bbe-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Image Inpainting via Generative Multi-column Convolutional Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11058", "id": "11058", "author_site": "Yi Wang, Xin Tao, Xiaojuan Qi, Xiaoyong Shen, Jiaya Jia", "author": "Yi Wang; Xin Tao; Xiaojuan Qi; Xiaoyong Shen; Jiaya Jia", "abstract": "In this paper, we propose a generative multi-column network for image inpainting. This network synthesizes different image components in a parallel manner within one stage. To better characterize global structures, we design a confidence-driven reconstruction loss while an implicit diversified MRF regularization is adopted to enhance local details. The multi-column network combined with the reconstruction and MRF loss propagates local and global information derived from context to the target inpainting regions. Extensive experiments on challenging street view, face, natural objects and scenes manifest that our method produces visual compelling results even without previously common post-processing.", "bibtex": "@inproceedings{NEURIPS2018_6f3ef77a,\n author = {Wang, Yi and Tao, Xin and Qi, Xiaojuan and Shen, Xiaoyong and Jia, Jiaya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Image Inpainting via Generative Multi-column Convolutional Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6f3ef77ac0e3619e98159e9b6febf557-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6f3ef77ac0e3619e98159e9b6febf557-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6f3ef77ac0e3619e98159e9b6febf557-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6f3ef77ac0e3619e98159e9b6febf557-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6f3ef77ac0e3619e98159e9b6febf557-Reviews.html", "metareview": "", "pdf_size": 5733238, "gs_citation": 437, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14919715529082387957&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "The Chinese University of Hong Kong; The Chinese University of Hong Kong + YouTu Lab, Tencent; The Chinese University of Hong Kong; YouTu Lab, Tencent; The Chinese University of Hong Kong + YouTu Lab, Tencent", "aff_domain": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;gmail.com;cse.cuhk.edu.hk", "email": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;gmail.com;cse.cuhk.edu.hk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6f3ef77ac0e3619e98159e9b6febf557-Abstract.html", "aff_unique_index": "0;0+1;0;1;0+1", "aff_unique_norm": "Chinese University of Hong Kong;Tencent", "aff_unique_dep": ";YouTu Lab", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.tencent.com", "aff_unique_abbr": "CUHK;Tencent", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0+0;0;0;0+0", "aff_country_unique": "China" }, { "title": "Image-to-image translation for cross-domain disentanglement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11146", "id": "11146", "author_site": "Abel Gonzalez-Garcia, Joost van de Weijer, Yoshua Bengio", "author": "Abel Gonzalez-Garcia; Joost van de Weijer; Yoshua Bengio", "abstract": "Deep image translation methods have recently shown excellent results, outputting high-quality images covering multiple modes of the data distribution. There has also been increased interest in disentangling the internal representations learned by deep methods to further improve their performance and achieve a finer control. In this paper, we bridge these two objectives and introduce the concept of cross-domain disentanglement. We aim to separate the internal representation into three parts. The shared part contains information for both domains. The exclusive parts, on the other hand, contain only factors of variation that are particular to each domain. We achieve this through bidirectional image translation based on Generative Adversarial Networks and cross-domain autoencoders, a novel network component. Our model offers multiple advantages. We can output diverse samples covering multiple modes of the distributions of both domains, perform domain- specific image transfer and interpolation, and cross-domain retrieval without the need of labeled data, only paired images. We compare our model to the state-of-the-art in multi-modal image translation and achieve better results for translation on challenging datasets as well as for cross-domain retrieval on realistic datasets.", "bibtex": "@inproceedings{NEURIPS2018_dc6a7071,\n author = {Gonzalez-Garcia, Abel and van de Weijer, Joost and Bengio, Yoshua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Image-to-image translation for cross-domain disentanglement},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dc6a70712a252123c40d2adba6a11d84-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dc6a70712a252123c40d2adba6a11d84-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dc6a70712a252123c40d2adba6a11d84-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dc6a70712a252123c40d2adba6a11d84-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dc6a70712a252123c40d2adba6a11d84-Reviews.html", "metareview": "", "pdf_size": 11463717, "gs_citation": 487, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7146735712017629088&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Computer Vision Center; Computer Vision Center + Universitat Aut\u00f2noma de Barcelona; MILA + Universit\u00e9 de Montr\u00e9al", "aff_domain": "cvc.uab.es; ; ", "email": "cvc.uab.es; ; ", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dc6a70712a252123c40d2adba6a11d84-Abstract.html", "aff_unique_index": "0;0+1;2+3", "aff_unique_norm": "Computer Vision Center;Universitat Aut\u00f2noma de Barcelona;Mila;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cvc.uab.cat/;https://www.uab.cat;https://mila.quebec;https://www.umontreal.ca", "aff_unique_abbr": "CVC;UAB;MILA;UdeM", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0;1+1", "aff_country_unique": "Spain;Canada" }, { "title": "Implicit Bias of Gradient Descent on Linear Convolutional Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11900", "id": "11900", "author_site": "Suriya Gunasekar, Jason Lee, Daniel Soudry, Nati Srebro", "author": "Suriya Gunasekar; Jason Lee; Daniel Soudry; Nati Srebro", "abstract": "We show that gradient descent on full-width linear convolutional networks of depth $L$ converges to a linear predictor related to the $\\ell_{2/L}$ bridge penalty in the frequency domain. This is in contrast to linearly fully connected networks, where gradient descent converges to the hard margin linear SVM solution, regardless of depth.", "bibtex": "@inproceedings{NEURIPS2018_0e98aeeb,\n author = {Gunasekar, Suriya and Lee, Jason D and Soudry, Daniel and Srebro, Nati},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Implicit Bias of Gradient Descent on Linear Convolutional Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0e98aeeb54acf612b9eb4e48a269814c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0e98aeeb54acf612b9eb4e48a269814c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0e98aeeb54acf612b9eb4e48a269814c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0e98aeeb54acf612b9eb4e48a269814c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0e98aeeb54acf612b9eb4e48a269814c-Reviews.html", "metareview": "", "pdf_size": 529856, "gs_citation": 493, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15783072683810160180&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "TTI at Chicago, USA; USC Los Angeles, USA; Technion, Israel; TTI at Chicago, USA", "aff_domain": "ttic.edu;marshall.usc.edu;gmail.com;ttic.edu", "email": "ttic.edu;marshall.usc.edu;gmail.com;ttic.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0e98aeeb54acf612b9eb4e48a269814c-Abstract.html", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Toyota Technological Institute at Chicago;University of Southern California;Technion - Israel Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tti-chicago.org;https://www.usc.edu;https://www.technion.ac.il/en/", "aff_unique_abbr": "TTI;USC;Technion", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Chicago;Los Angeles;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Israel" }, { "title": "Implicit Probabilistic Integrators for ODEs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11698", "id": "11698", "author_site": "Onur Teymur, Han Cheng Lie, Tim Sullivan, Ben Calderhead", "author": "Onur Teymur; Han Cheng Lie; Tim Sullivan; Ben Calderhead", "abstract": "We introduce a family of implicit probabilistic integrators for initial value problems (IVPs), taking as a starting point the multistep Adams\u2013Moulton method. The implicit construction allows for dynamic feedback from the forthcoming time-step, in contrast to previous probabilistic integrators, all of which are based on explicit methods. We begin with a concise survey of the rapidly-expanding field of probabilistic ODE solvers. We then introduce our method, which builds on and adapts the work of Conrad et al. (2016) and Teymur et al. (2016), and provide a rigorous proof of its well-definedness and convergence. We discuss the problem of the calibration of such integrators and suggest one approach. We give an illustrative example highlighting the effect of the use of probabilistic integrators\u2014including our new method\u2014in the setting of parameter inference within an inverse problem.", "bibtex": "@inproceedings{NEURIPS2018_228b2558,\n author = {Teymur, Onur and Lie, Han Cheng and Sullivan, Tim and Calderhead, Ben},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Implicit Probabilistic Integrators for ODEs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/228b25587479f2fc7570428e8bcbabdc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/228b25587479f2fc7570428e8bcbabdc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/228b25587479f2fc7570428e8bcbabdc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/228b25587479f2fc7570428e8bcbabdc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/228b25587479f2fc7570428e8bcbabdc-Reviews.html", "metareview": "", "pdf_size": 3929157, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11253951877712401188&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/228b25587479f2fc7570428e8bcbabdc-Abstract.html" }, { "title": "Implicit Reparameterization Gradients", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11068", "id": "11068", "author_site": "Mikhail Figurnov, Shakir Mohamed, Andriy Mnih", "author": "Mikhail Figurnov; Shakir Mohamed; Andriy Mnih", "abstract": "By providing a simple and efficient way of computing low-variance gradients of continuous random variables, the reparameterization trick has become the technique of choice for training a variety of latent variable models. However, it is not applicable to a number of important continuous distributions. We introduce an alternative approach to computing reparameterization gradients based on implicit differentiation and demonstrate its broader applicability by applying it to Gamma, Beta, Dirichlet, and von Mises distributions, which cannot be used with the classic reparameterization trick. Our experiments show that the proposed approach is faster and more accurate than the existing gradient estimators for these distributions.", "bibtex": "@inproceedings{NEURIPS2018_92c8c96e,\n author = {Figurnov, Mikhail and Mohamed, Shakir and Mnih, Andriy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Implicit Reparameterization Gradients},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/92c8c96e4c37100777c7190b76d28233-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/92c8c96e4c37100777c7190b76d28233-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/92c8c96e4c37100777c7190b76d28233-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/92c8c96e4c37100777c7190b76d28233-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/92c8c96e4c37100777c7190b76d28233-Reviews.html", "metareview": "", "pdf_size": 1174731, "gs_citation": 312, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2638066709160939372&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "DeepMind, London, UK; DeepMind, London, UK; DeepMind, London, UK", "aff_domain": "google.com;google.com;google.com", "email": "google.com;google.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/92c8c96e4c37100777c7190b76d28233-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "DeepMind", "aff_unique_dep": "", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Importance Weighting and Variational Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11441", "id": "11441", "author_site": "Justin Domke, Daniel Sheldon", "author": "Justin Domke; Daniel R. Sheldon", "abstract": "Recent work used importance sampling ideas for better variational bounds on likelihoods. We clarify the applicability of these ideas to pure probabilistic inference, by showing the resulting Importance Weighted Variational Inference (IWVI) technique is an instance of augmented variational inference, thus identifying the looseness in previous work. Experiments confirm IWVI's practicality for probabilistic inference. As a second contribution, we investigate inference with elliptical distributions, which improves accuracy in low dimensions, and convergence in high dimensions.", "bibtex": "@inproceedings{NEURIPS2018_25db67c5,\n author = {Domke, Justin and Sheldon, Daniel R},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Importance Weighting and Variational Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/25db67c5657914454081c6a18e93d6dd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/25db67c5657914454081c6a18e93d6dd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/25db67c5657914454081c6a18e93d6dd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/25db67c5657914454081c6a18e93d6dd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/25db67c5657914454081c6a18e93d6dd-Reviews.html", "metareview": "", "pdf_size": 2819165, "gs_citation": 144, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1257541268560546661&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "College of Information and Computer Sciences, University of Massachusetts Amherst; College of Information and Computer Sciences, University of Massachusetts Amherst + Department of Computer Science, Mount Holyoke College", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/25db67c5657914454081c6a18e93d6dd-Abstract.html", "aff_unique_index": "0;0+1", "aff_unique_norm": "University of Massachusetts Amherst;Mount Holyoke College", "aff_unique_dep": "College of Information and Computer Sciences;Department of Computer Science", "aff_unique_url": "https://www.umass.edu;https://www.mtholyoke.edu", "aff_unique_abbr": "UMass Amherst;MHC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0+0", "aff_country_unique": "United States" }, { "title": "Improved Algorithms for Collaborative PAC Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11733", "id": "11733", "author_site": "Huy Nguyen, Lydia Zakynthinou", "author": "Huy Nguyen; Lydia Zakynthinou", "abstract": "We study a recent model of collaborative PAC learning where $k$ players with $k$ different tasks collaborate to learn a single classifier that works for all tasks. Previous work showed that when there is a classifier that has very small error on all tasks, there is a collaborative algorithm that finds a single classifier for all tasks and has $O((\\ln (k))^2)$ times the worst-case sample complexity for learning a single task. In this work, we design new algorithms for both the realizable and the non-realizable setting, having sample complexity only $O(\\ln (k))$ times the worst-case sample complexity for learning a single task. The sample complexity upper bounds of our algorithms match previous lower bounds and in some range of parameters are even better than previous algorithms that are allowed to output different classifiers for different tasks.", "bibtex": "@inproceedings{NEURIPS2018_3569df15,\n author = {Nguyen, Huy and Zakynthinou, Lydia},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improved Algorithms for Collaborative PAC Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3569df159ec477451530c4455b2a9e86-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3569df159ec477451530c4455b2a9e86-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3569df159ec477451530c4455b2a9e86-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3569df159ec477451530c4455b2a9e86-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3569df159ec477451530c4455b2a9e86-Reviews.html", "metareview": "", "pdf_size": 276322, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1175188818285989172&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "College of Computer and Information Science, Northeastern University; College of Computer and Information Science, Northeastern University", "aff_domain": "northeastern.edu;northeastern.edu", "email": "northeastern.edu;northeastern.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3569df159ec477451530c4455b2a9e86-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "College of Computer and Information Science", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Improved Expressivity Through Dendritic Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11772", "id": "11772", "author_site": "Xundong Wu, Xiangwen Liu, Wei Li, Qing Wu", "author": "Xundong Wu; Xiangwen Liu; Wei Li; Qing Wu", "abstract": "A typical biological neuron, such as a pyramidal neuron of the neocortex, receives thousands of afferent synaptic inputs on its dendrite tree and sends the efferent axonal output downstream. In typical artificial neural networks, dendrite trees are modeled as linear structures that funnel weighted synaptic inputs to the cell bodies. However, numerous experimental and theoretical studies have shown that dendritic arbors are far more than simple linear accumulators. That is, synaptic inputs can actively modulate their neighboring synaptic activities; therefore, the dendritic structures are highly nonlinear. In this study, we model such local nonlinearity of dendritic trees with our dendritic neural network (DENN) structure and apply this structure to typical machine learning tasks. Equipped with localized nonlinearities, DENNs can attain greater model expressivity than regular neural networks while maintaining efficient network inference. Such strength is evidenced by the increased fitting power when we train DENNs with supervised machine learning tasks. We also empirically show that the locality structure can improve the generalization performance of DENNs, as exemplified by DENNs outranking naive deep neural network architectures when tested on 121 classification tasks from the UCI machine learning repository.", "bibtex": "@inproceedings{NEURIPS2018_e32c51ad,\n author = {Wu, Xundong and Liu, Xiangwen and Li, Wei and Wu, Qing},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improved Expressivity Through Dendritic Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e32c51ad39723ee92b285b362c916ca7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e32c51ad39723ee92b285b362c916ca7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e32c51ad39723ee92b285b362c916ca7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e32c51ad39723ee92b285b362c916ca7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e32c51ad39723ee92b285b362c916ca7-Reviews.html", "metareview": "", "pdf_size": 433011, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3202472222523327804&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff": "School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China; School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China; School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China; School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China", "aff_domain": "gmail.com; ; ;hdu.edu.cn", "email": "gmail.com; ; ;hdu.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e32c51ad39723ee92b285b362c916ca7-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Hangzhou Dianzi University", "aff_unique_dep": "School of Computer Science and Technology", "aff_unique_url": "http://www.hdu.edu.cn/", "aff_unique_abbr": "HDU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hangzhou", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Improved Network Robustness with Adversary Critic", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12000", "id": "12000", "author_site": "Alexander Matyasko, Lap-Pui Chau", "author": "Alexander Matyasko; Lap-Pui Chau", "abstract": "Ideally, what confuses neural network should be confusing to humans. However, recent experiments have shown that small, imperceptible perturbations can change the network prediction. To address this gap in perception, we propose a novel approach for learning robust classifier. Our main idea is: adversarial examples for the robust classifier should be indistinguishable from the regular data of the adversarial target. We formulate a problem of learning robust classifier in the framework of Generative Adversarial Networks (GAN), where the adversarial attack on classifier acts as a generator, and the critic network learns to distinguish between regular and adversarial images. The classifier cost is augmented with the objective that its adversarial examples should confuse the adversary critic. To improve the stability of the adversarial mapping, we introduce adversarial cycle-consistency constraint which ensures that the adversarial mapping of the adversarial examples is close to the original. In the experiments, we show the effectiveness of our defense. Our method surpasses in terms of robustness networks trained with adversarial training. Additionally, we verify in the experiments with human annotators on MTurk that adversarial examples are indeed visually confusing.", "bibtex": "@inproceedings{NEURIPS2018_f77ecc17,\n author = {Matyasko, Alexander and Chau, Lap-Pui},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improved Network Robustness with Adversary Critic},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f77ecc17109b1b806350eb7e7bbfd861-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f77ecc17109b1b806350eb7e7bbfd861-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f77ecc17109b1b806350eb7e7bbfd861-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f77ecc17109b1b806350eb7e7bbfd861-Reviews.html", "metareview": "", "pdf_size": 583429, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4193325299886417643&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore; School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore", "aff_domain": "ntu.edu.sg;ntu.edu.sg", "email": "ntu.edu.sg;ntu.edu.sg", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f77ecc17109b1b806350eb7e7bbfd861-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "School of Electrical and Electronic Engineering", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Singapore", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "Improving Explorability in Variational Inference with Annealed Variational Objectives", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11922", "id": "11922", "author_site": "Chin-Wei Huang, Shawn Tan, Alexandre Lacoste, Aaron Courville", "author": "Chin-Wei Huang; Shawn Tan; Alexandre Lacoste; Aaron C. Courville", "abstract": "Despite the advances in the representational capacity of approximate distributions for variational inference, the optimization process can still limit the density that is ultimately learned.\nWe demonstrate the drawbacks of biasing the true posterior to be unimodal, and introduce Annealed Variational Objectives (AVO) into the training of hierarchical variational methods.\nInspired by Annealed Importance Sampling, the proposed method facilitates learning by incorporating energy tempering into the optimization objective.\nIn our experiments, we demonstrate our method's robustness to deterministic warm up, and the benefits of encouraging exploration in the latent space.", "bibtex": "@inproceedings{NEURIPS2018_65b0df23,\n author = {Huang, Chin-Wei and Tan, Shawn and Lacoste, Alexandre and Courville, Aaron C},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improving Explorability in Variational Inference with Annealed Variational Objectives},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/65b0df23fd2d449ae1e4b2d27151d73b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/65b0df23fd2d449ae1e4b2d27151d73b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/65b0df23fd2d449ae1e4b2d27151d73b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/65b0df23fd2d449ae1e4b2d27151d73b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/65b0df23fd2d449ae1e4b2d27151d73b-Reviews.html", "metareview": "", "pdf_size": 902289, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5674269148497531231&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "MILA, University of Montreal+Element AI+CIFAR Fellow; MILA, University of Montreal; Element AI; MILA, University of Montreal+CIFAR Fellow", "aff_domain": "umontreal.ca;umontreal.ca;elementai.com;umontreal.ca", "email": "umontreal.ca;umontreal.ca;elementai.com;umontreal.ca", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/65b0df23fd2d449ae1e4b2d27151d73b-Abstract.html", "aff_unique_index": "0+1+2;0;1;0+2", "aff_unique_norm": "University of Montreal;Element AI;CIFAR", "aff_unique_dep": "MILA;;", "aff_unique_url": "https://www.mila.quebec;https://www.elementai.com;https://www.cifar.ca", "aff_unique_abbr": "MILA;Element AI;CIFAR", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0+0+0;0;0;0+0", "aff_country_unique": "Canada" }, { "title": "Improving Exploration in Evolution Strategies for Deep Reinforcement Learning via a Population of Novelty-Seeking Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11492", "id": "11492", "author_site": "Edoardo Conti, Vashisht Madhavan, Felipe Petroski Such, Joel Lehman, Kenneth Stanley, Jeff Clune", "author": "Edoardo Conti; Vashisht Madhavan; Felipe Petroski Such; Joel Lehman; Kenneth Stanley; Jeff Clune", "abstract": "Evolution strategies (ES) are a family of black-box optimization algorithms able to train deep neural networks roughly as well as Q-learning and policy gradient methods on challenging deep reinforcement learning (RL) problems, but are much faster (e.g. hours vs. days) because they parallelize better. However, many RL problems require directed exploration because they have reward functions that are sparse or deceptive (i.e. contain local optima), and it is unknown how to encourage such exploration with ES. Here we show that algorithms that have been invented to promote directed exploration in small-scale evolved neural networks via populations of exploring agents, specifically novelty search (NS) and quality diversity (QD) algorithms, can be hybridized with ES to improve its performance on sparse or deceptive deep RL tasks, while retaining scalability. Our experiments confirm that the resultant new algorithms, NS-ES and two QD algorithms, NSR-ES and NSRA-ES, avoid local optima encountered by ES to achieve higher performance on Atari and simulated robots learning to walk around a deceptive trap. This paper thus introduces a family of fast, scalable algorithms for reinforcement learning that are capable of directed exploration. It also adds this new family of exploration algorithms to the RL toolbox and raises the interesting possibility that analogous algorithms with multiple simultaneous paths of exploration might also combine well with existing RL algorithms outside ES.", "bibtex": "@inproceedings{NEURIPS2018_b1301141,\n author = {Conti, Edoardo and Madhavan, Vashisht and Petroski Such, Felipe and Lehman, Joel and Stanley, Kenneth and Clune, Jeff},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improving Exploration in Evolution Strategies for Deep Reinforcement Learning via a Population of Novelty-Seeking Agents},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b1301141feffabac455e1f90a7de2054-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b1301141feffabac455e1f90a7de2054-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b1301141feffabac455e1f90a7de2054-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b1301141feffabac455e1f90a7de2054-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b1301141feffabac455e1f90a7de2054-Reviews.html", "metareview": "", "pdf_size": 1523362, "gs_citation": 449, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9461747331584701646&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Uber AI Labs; Uber AI Labs; Uber AI Labs; Uber AI Labs; Uber AI Labs; Uber AI Labs", "aff_domain": "gmail.com;uber.com; ; ; ; ", "email": "gmail.com;uber.com; ; ; ; ", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b1301141feffabac455e1f90a7de2054-Abstract.html", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Uber", "aff_unique_dep": "Uber AI Labs", "aff_unique_url": "https://www.uber.com", "aff_unique_abbr": "Uber AI Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Improving Neural Program Synthesis with Inferred Execution Traces", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11851", "id": "11851", "author_site": "Richard Shin, Illia Polosukhin, Dawn Song", "author": "Eui Chul Shin; Illia Polosukhin; Dawn Song", "abstract": "The task of program synthesis, or automatically generating programs that are consistent with a provided specification, remains a challenging task in artificial intelligence. As in other fields of AI, deep learning-based end-to-end approaches have made great advances in program synthesis. However, more so than other fields such as computer vision, program synthesis provides greater opportunities to explicitly exploit structured information such as execution traces, which contain a superset of the information input/output pairs. While they are highly useful for program synthesis, as execution traces are more difficult to obtain than input/output pairs, we use the insight that we can split the process into two parts: infer the trace from the input/output example, then infer the program from the trace. This simple modification leads to state-of-the-art results in program synthesis in the Karel domain, improving accuracy to 81.3% from the 77.12% of prior work.", "bibtex": "@inproceedings{NEURIPS2018_7776e88b,\n author = {Shin, Eui Chul and Polosukhin, Illia and Song, Dawn},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improving Neural Program Synthesis with Inferred Execution Traces},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7776e88b0c189539098176589250bcba-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7776e88b0c189539098176589250bcba-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7776e88b0c189539098176589250bcba-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7776e88b0c189539098176589250bcba-Reviews.html", "metareview": "", "pdf_size": 401290, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13178411963257726290&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "UC Berkeley+NEAR Protocol; NEAR Protocol; UC Berkeley", "aff_domain": "cs.berkeley.edu;nearprotocol.com;cs.berkeley.edu", "email": "cs.berkeley.edu;nearprotocol.com;cs.berkeley.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7776e88b0c189539098176589250bcba-Abstract.html", "aff_unique_index": "0+1;1;0", "aff_unique_norm": "University of California, Berkeley;NEAR Protocol", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://near.org", "aff_unique_abbr": "UC Berkeley;NEAR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0+1;1;0", "aff_country_unique": "United States;Unknown" }, { "title": "Improving Online Algorithms via ML Predictions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11918", "id": "11918", "author_site": "Manish Purohit, Zoya Svitkina, Ravi Kumar", "author": "Manish Purohit; Zoya Svitkina; Ravi Kumar", "abstract": "In this work we study the problem of using machine-learned predictions to improve performance of online algorithms. We consider two classical problems, ski rental and non-clairvoyant job scheduling, and obtain new online algorithms that use predictions to make their decisions. These algorithms are oblivious to the performance of the predictor, improve with better predictions, but do not degrade much if the predictions are poor.", "bibtex": "@inproceedings{NEURIPS2018_73a427ba,\n author = {Purohit, Manish and Svitkina, Zoya and Kumar, Ravi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improving Online Algorithms via ML Predictions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/73a427badebe0e32caa2e1fc7530b7f3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/73a427badebe0e32caa2e1fc7530b7f3-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/73a427badebe0e32caa2e1fc7530b7f3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/73a427badebe0e32caa2e1fc7530b7f3-Reviews.html", "metareview": "", "pdf_size": 347331, "gs_citation": 363, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16315231422991377566&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff": "Google; Google; Google", "aff_domain": "gmail.com;google.com;cs.cornell.edu", "email": "gmail.com;google.com;cs.cornell.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/73a427badebe0e32caa2e1fc7530b7f3-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Improving Simple Models with Confidence Profiles", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11974", "id": "11974", "author_site": "Amit Dhurandhar, Karthikeyan Shanmugam, Ronny Luss, Peder A Olsen", "author": "Amit Dhurandhar; Karthikeyan Shanmugam; Ronny Luss; Peder A. Olsen", "abstract": "In this paper, we propose a new method called ProfWeight for transferring information from a pre-trained deep neural network that has a high test accuracy to a simpler interpretable model or a very shallow network of low complexity and a priori low test accuracy. We are motivated by applications in interpretability and model deployment in severely memory constrained environments (like sensors). Our method uses linear probes to generate confidence scores through flattened intermediate representations. Our transfer method involves a theoretically justified weighting of samples during the training of the simple model using confidence scores of these intermediate layers. The value of our method is first demonstrated on CIFAR-10, where our weighting method significantly improves (3-4\\%) networks with only a fraction of the number of Resnet blocks of a complex Resnet model. We further demonstrate operationally significant results on a real manufacturing problem, where we dramatically increase the test accuracy of a CART model (the domain standard) by roughly $13\\%$.", "bibtex": "@inproceedings{NEURIPS2018_972cda1e,\n author = {Dhurandhar, Amit and Shanmugam, Karthikeyan and Luss, Ronny and Olsen, Peder A},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improving Simple Models with Confidence Profiles},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/972cda1e62b72640cb7ac702714a115f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/972cda1e62b72640cb7ac702714a115f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/972cda1e62b72640cb7ac702714a115f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/972cda1e62b72640cb7ac702714a115f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/972cda1e62b72640cb7ac702714a115f-Reviews.html", "metareview": "", "pdf_size": 358196, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3751749203077156755&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "IBM Research, Yorktown Heights, NY; IBM Research, Yorktown Heights, NY; IBM Research, Yorktown Heights, NY; IBM Research, Yorktown Heights, NY", "aff_domain": "us.ibm.com;ibm.com;us.ibm.com;us.ibm.com", "email": "us.ibm.com;ibm.com;us.ibm.com;us.ibm.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/972cda1e62b72640cb7ac702714a115f-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "IBM", "aff_unique_dep": "IBM Research", "aff_unique_url": "https://www.ibm.com/research", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Yorktown Heights", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Incorporating Context into Language Encoding Models for fMRI", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11640", "id": "11640", "author_site": "Shailee Jain, Alexander Huth", "author": "Shailee Jain; Alexander Huth", "abstract": "Language encoding models help explain language processing in the human brain by learning functions that predict brain responses from the language stimuli that elicited them. Current word embedding-based approaches treat each stimulus word independently and thus ignore the influence of context on language understanding. In this work we instead build encoding models using rich contextual representations derived from an LSTM language model. Our models show a significant improvement in encoding performance relative to state-of-the-art embeddings in nearly every brain area. By varying the amount of context used in the models and providing the models with distorted context, we show that this improvement is due to a combination of better word embeddings learned by the LSTM language model and contextual information. We are also able to use our models to map context sensitivity across the cortex. These results suggest that LSTM language models learn high-level representations that are related to representations in the human brain.", "bibtex": "@inproceedings{NEURIPS2018_f471223d,\n author = {Jain, Shailee and Huth, Alexander},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Incorporating Context into Language Encoding Models for fMRI},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f471223d1a1614b58a7dc45c9d01df19-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f471223d1a1614b58a7dc45c9d01df19-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f471223d1a1614b58a7dc45c9d01df19-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f471223d1a1614b58a7dc45c9d01df19-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f471223d1a1614b58a7dc45c9d01df19-Reviews.html", "metareview": "", "pdf_size": 18899122, "gs_citation": 240, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16680485027101599625&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Departments of Computer Science & Neuroscience, The University of Texas at Austin; Departments of Computer Science & Neuroscience, The University of Texas at Austin", "aff_domain": "cs.utexas.edu;cs.utexas.edu", "email": "cs.utexas.edu;cs.utexas.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f471223d1a1614b58a7dc45c9d01df19-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "Departments of Computer Science & Neuroscience", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Inequity aversion improves cooperation in intertemporal social dilemmas", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11335", "id": "11335", "author_site": "Edward Hughes, Joel Leibo, Matthew Phillips, Karl Tuyls, Edgar Due\u00f1ez-Guzman, Antonio Garc\u00eda Casta\u00f1eda, Iain Dunning, Tina Zhu, Kevin McKee, Raphael Koster, Heather Roff, Thore Graepel", "author": "Edward Hughes; Joel Z. Leibo; Matthew Phillips; Karl Tuyls; Edgar Due\u00f1ez-Guzman; Antonio Garc\u00eda Casta\u00f1eda; Iain Dunning; Tina Zhu; Kevin McKee; Raphael Koster; Heather Roff; Thore Graepel", "abstract": "Groups of humans are often able to find ways to cooperate with one another in complex, temporally extended social dilemmas. Models based on behavioral economics are only able to explain this phenomenon for unrealistic stateless matrix games. Recently, multi-agent reinforcement learning has been applied to generalize social dilemma problems to temporally and spatially extended Markov games. However, this has not yet generated an agent that learns to cooperate in social dilemmas as humans do. A key insight is that many, but not all, human individuals have inequity averse social preferences. This promotes a particular resolution of the matrix game social dilemma wherein inequity-averse individuals are personally pro-social and punish defectors. Here we extend this idea to Markov games and show that it promotes cooperation in several types of sequential social dilemma, via a profitable interaction with policy learnability. In particular, we find that inequity aversion improves temporal credit assignment for the important class of intertemporal social dilemmas. These results help explain how large-scale cooperation may emerge and persist.", "bibtex": "@inproceedings{NEURIPS2018_7fea637f,\n author = {Hughes, Edward and Leibo, Joel Z and Phillips, Matthew and Tuyls, Karl and Due\\~{n}ez-Guzman, Edgar and Garc\\'{\\i}a Casta\\~{n}eda, Antonio and Dunning, Iain and Zhu, Tina and McKee, Kevin and Koster, Raphael and Roff, Heather and Graepel, Thore},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Inequity aversion improves cooperation in intertemporal social dilemmas},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7fea637fd6d02b8f0adf6f7dc36aed93-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7fea637fd6d02b8f0adf6f7dc36aed93-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7fea637fd6d02b8f0adf6f7dc36aed93-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7fea637fd6d02b8f0adf6f7dc36aed93-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7fea637fd6d02b8f0adf6f7dc36aed93-Reviews.html", "metareview": "", "pdf_size": 1845716, "gs_citation": 296, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14342622231580539116&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 13, "aff": "DeepMind, London, United Kingdom; DeepMind, London, United Kingdom; DeepMind, London, United Kingdom; DeepMind, London, United Kingdom; DeepMind, London, United Kingdom; DeepMind, London, United Kingdom; DeepMind, London, United Kingdom; DeepMind, London, United Kingdom; DeepMind, London, United Kingdom; DeepMind, London, United Kingdom; DeepMind, London, United Kingdom; DeepMind, London, United Kingdom", "aff_domain": "google.com;google.com;ucl.ac.uk;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;ucl.ac.uk;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 12, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7fea637fd6d02b8f0adf6f7dc36aed93-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "DeepMind", "aff_unique_dep": "", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Inexact trust-region algorithms on Riemannian manifolds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11421", "id": "11421", "author_site": "Hiroyuki Kasai, Bamdev Mishra", "author": "Hiroyuki Kasai; Bamdev Mishra", "abstract": "We consider an inexact variant of the popular Riemannian trust-region algorithm for structured big-data minimization problems. The proposed algorithm approximates the gradient and the Hessian in addition to the solution of a trust-region sub-problem. Addressing large-scale finite-sum problems, we specifically propose sub-sampled algorithms with a fixed bound on sub-sampled Hessian and gradient sizes, where the gradient and Hessian are computed by a random sampling technique. Numerical evaluations demonstrate that the proposed algorithms outperform state-of-the-art Riemannian deterministic and stochastic gradient algorithms across different applications.", "bibtex": "@inproceedings{NEURIPS2018_3e9e39fe,\n author = {Kasai, Hiroyuki and Mishra, Bamdev},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Inexact trust-region algorithms on Riemannian manifolds},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3e9e39fed3b8369ed940f52cf300cf88-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3e9e39fed3b8369ed940f52cf300cf88-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3e9e39fed3b8369ed940f52cf300cf88-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3e9e39fed3b8369ed940f52cf300cf88-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3e9e39fed3b8369ed940f52cf300cf88-Reviews.html", "metareview": "", "pdf_size": 384430, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=197435474681214281&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "The University of Electro-Communications, Japan; Microsoft, India", "aff_domain": "is.uec.ac.jp;microsoft.com", "email": "is.uec.ac.jp;microsoft.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3e9e39fed3b8369ed940f52cf300cf88-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University of Electro-Communications;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.uec.ac.jp;https://www.microsoft.com/en-in", "aff_unique_abbr": "UEC;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Japan;India" }, { "title": "Inference Aided Reinforcement Learning for Incentive Mechanism Design in Crowdsourcing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11538", "id": "11538", "author_site": "Zehong Hu, Yitao Liang, Jie Zhang, Zhao Li, Yang Liu", "author": "Zehong Hu; Yitao Liang; Jie Zhang; Zhao Li; Yang Liu", "abstract": "Incentive mechanisms for crowdsourcing are designed to incentivize financially self-interested workers to generate and report high-quality labels. Existing mechanisms are often developed as one-shot static solutions, assuming a certain level of knowledge about worker models (expertise levels, costs for exerting efforts, etc.). In this paper, we propose a novel inference aided reinforcement mechanism that acquires data sequentially and requires no such prior assumptions. Specifically, we first design a Gibbs sampling augmented Bayesian inference algorithm to estimate workers' labeling strategies from the collected labels at each step. Then we propose a reinforcement incentive learning (RIL) method, building on top of the above estimates, to uncover how workers respond to different payments. RIL dynamically determines the payment without accessing any ground-truth labels. We theoretically prove that RIL is able to incentivize rational workers to provide high-quality labels both at each step and in the long run. Empirical results show that our mechanism performs consistently well under both rational and non-fully rational (adaptive learning) worker models. Besides, the payments offered by RIL are more robust and have lower variances compared to existing one-shot mechanisms.", "bibtex": "@inproceedings{NEURIPS2018_f2e43fa3,\n author = {Hu, Zehong and Liang, Yitao and Zhang, Jie and Li, Zhao and Liu, Yang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Inference Aided Reinforcement Learning for Incentive Mechanism Design in Crowdsourcing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f2e43fa3400d826df4195a9ac70dca62-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f2e43fa3400d826df4195a9ac70dca62-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f2e43fa3400d826df4195a9ac70dca62-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f2e43fa3400d826df4195a9ac70dca62-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f2e43fa3400d826df4195a9ac70dca62-Reviews.html", "metareview": "", "pdf_size": 731364, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15573318561468756179&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff": "Alibaba Group, Hangzhou, China; University of California, Los Angeles; Nanyang Technological University; Alibaba Group, Hangzhou, China; University of California, Santa Cruz/Harvard University", "aff_domain": "e.ntu.edu.sg;cs.ucla.edu;ntu.edu.sg;alibaba-inc.com;ucsc.edu", "email": "e.ntu.edu.sg;cs.ucla.edu;ntu.edu.sg;alibaba-inc.com;ucsc.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f2e43fa3400d826df4195a9ac70dca62-Abstract.html", "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Alibaba Group;University of California, Los Angeles;Nanyang Technological University;University of California, Santa Cruz", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.alibaba.com;https://www.ucla.edu;https://www.ntu.edu.sg;https://www.ucsc.edu", "aff_unique_abbr": "Alibaba;UCLA;NTU;UCSC", "aff_campus_unique_index": "0;1;0;3", "aff_campus_unique": "Hangzhou;Los Angeles;;Santa Cruz", "aff_country_unique_index": "0;1;2;0;1", "aff_country_unique": "China;United States;Singapore" }, { "title": "Inference in Deep Gaussian Processes using Stochastic Gradient Hamiltonian Monte Carlo", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11722", "id": "11722", "author_site": "Marton Havasi, Jos\u00e9 Miguel Hern\u00e1ndez-Lobato, Juan J. Murillo-Fuentes", "author": "Marton Havasi; Jos\u00e9 Miguel Hern\u00e1ndez-Lobato; Juan Jos\u00e9 Murillo-Fuentes", "abstract": "Deep Gaussian Processes (DGPs) are hierarchical generalizations of Gaussian Processes that combine well calibrated uncertainty estimates with the high flexibility of multilayer models. One of the biggest challenges with these models is that exact inference is intractable. The current state-of-the-art inference method, Variational Inference (VI), employs a Gaussian approximation to the posterior distribution. This can be a potentially poor unimodal approximation of the generally multimodal posterior. In this work, we provide evidence for the non-Gaussian nature of the posterior and we apply the Stochastic Gradient Hamiltonian Monte Carlo method to generate samples. To efficiently optimize the hyperparameters, we introduce the Moving Window MCEM algorithm. This results in significantly better predictions at a lower computational cost than its VI counterpart. Thus our method establishes a new state-of-the-art for inference in DGPs.", "bibtex": "@inproceedings{NEURIPS2018_4172f310,\n author = {Havasi, Marton and Hern\\'{a}ndez-Lobato, Jos\\'{e} Miguel and Murillo-Fuentes, Juan Jos\\'{e}},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Inference in Deep Gaussian Processes using Stochastic Gradient Hamiltonian Monte Carlo},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4172f3101212a2009c74b547b6ddf935-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4172f3101212a2009c74b547b6ddf935-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4172f3101212a2009c74b547b6ddf935-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4172f3101212a2009c74b547b6ddf935-Reviews.html", "metareview": "", "pdf_size": 1250968, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3764755113585298283&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Engineering, University of Cambridge; Department of Engineering, University of Cambridge + Microsoft Research + Alan Turing Institute; Department of Signal Theory and Communications, University of Sevilla", "aff_domain": "cam.ac.uk;cam.ac.uk;us.es", "email": "cam.ac.uk;cam.ac.uk;us.es", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4172f3101212a2009c74b547b6ddf935-Abstract.html", "aff_unique_index": "0;0+1+2;3", "aff_unique_norm": "University of Cambridge;Microsoft;Alan Turing Institute;University of Sevilla", "aff_unique_dep": "Department of Engineering;Microsoft Research;;Department of Signal Theory and Communications", "aff_unique_url": "https://www.cam.ac.uk;https://www.microsoft.com/en-us/research;https://www.turing.ac.uk;https://www.us.es", "aff_unique_abbr": "Cambridge;MSR;ATI;US", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0+1+0;2", "aff_country_unique": "United Kingdom;United States;Spain" }, { "title": "Inferring Latent Velocities from Weather Radar Data using Gaussian Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11857", "id": "11857", "author_site": "Rico Angell, Daniel Sheldon", "author": "Rico Angell; Daniel R. Sheldon", "abstract": "Archived data from the US network of weather radars hold detailed information about bird migration over the last 25 years, including very high-resolution partial measurements of velocity. Historically, most of this spatial resolution is discarded and velocities are summarized at a very small number of locations due to modeling and algorithmic limitations. This paper presents a Gaussian process (GP) model to reconstruct high-resolution full velocity fields across the entire US. The GP faithfully models all aspects of the problem in a single joint framework, including spatially random velocities, partial velocity measurements, station-specific geometries, measurement noise, and an ambiguity known as aliasing. We develop fast inference algorithms based on the FFT; to do so, we employ a creative use of Laplace's method to sidestep the fact that the kernel of the joint process is non-stationary.", "bibtex": "@inproceedings{NEURIPS2018_23451391,\n author = {Angell, Rico and Sheldon, Daniel R},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Inferring Latent Velocities from Weather Radar Data using Gaussian Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/23451391cd1399019fa0421129066bc6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/23451391cd1399019fa0421129066bc6-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/23451391cd1399019fa0421129066bc6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/23451391cd1399019fa0421129066bc6-Reviews.html", "metareview": "", "pdf_size": 3633270, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9714876415234145032&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "University of Massachusetts Amherst; University of Massachusetts Amherst", "aff_domain": "cs.umass.edu;cs.umass.edu", "email": "cs.umass.edu;cs.umass.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/23451391cd1399019fa0421129066bc6-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Massachusetts Amherst", "aff_unique_dep": "", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass Amherst", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Inferring Networks From Random Walk-Based Node Similarities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11370", "id": "11370", "author_site": "Jeremy Hoskins, Cameron Musco, Christopher Musco, Charalampos Tsourakakis", "author": "Jeremy Hoskins; Cameron Musco; Christopher Musco; Babis Tsourakakis", "abstract": "Digital presence in the world of online social media entails significant privacy risks. In this work we consider a privacy threat to a social network in which an attacker has access to a subset of random walk-based node similarities, such as effective resistances (i.e., commute times) or personalized PageRank scores. Using these similarities, the attacker seeks to infer as much information as possible about the network, including unknown pairwise node similarities and edges.", "bibtex": "@inproceedings{NEURIPS2018_2f25f6e3,\n author = {Hoskins, Jeremy and Musco, Cameron and Musco, Christopher and Tsourakakis, Babis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Inferring Networks From Random Walk-Based Node Similarities},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2f25f6e326adb93c5787175dda209ab6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2f25f6e326adb93c5787175dda209ab6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2f25f6e326adb93c5787175dda209ab6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2f25f6e326adb93c5787175dda209ab6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2f25f6e326adb93c5787175dda209ab6-Reviews.html", "metareview": "", "pdf_size": 3566070, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4035487172765819261&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of Mathematics, Yale University; Microsoft Research; Department of Computer Science, Princeton University; Department of Computer Science, Boston University + Harvard University", "aff_domain": "yale.edu;microsoft.com;cs.princeton.edu;bu.edu", "email": "yale.edu;microsoft.com;cs.princeton.edu;bu.edu", "github": "https://github.com/cnmusco/graph-similarity-learning", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2f25f6e326adb93c5787175dda209ab6-Abstract.html", "aff_unique_index": "0;1;2;3+4", "aff_unique_norm": "Yale University;Microsoft;Princeton University;Boston University;Harvard University", "aff_unique_dep": "Department of Mathematics;Microsoft Research;Department of Computer Science;Department of Computer Science;", "aff_unique_url": "https://www.yale.edu;https://www.microsoft.com/en-us/research;https://www.princeton.edu;https://www.bu.edu;https://www.harvard.edu", "aff_unique_abbr": "Yale;MSR;Princeton;BU;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0+0", "aff_country_unique": "United States" }, { "title": "Infinite-Horizon Gaussian Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11350", "id": "11350", "author_site": "Arno Solin, James Hensman, Richard Turner", "author": "Arno Solin; James Hensman; Richard E Turner", "abstract": "Gaussian processes provide a flexible framework for forecasting, removing noise, and interpreting long temporal datasets. State space modelling (Kalman filtering) enables these non-parametric models to be deployed on long datasets by reducing the complexity to linear in the number of data points. The complexity is still cubic in the state dimension m which is an impediment to practical application. In certain special cases (Gaussian likelihood, regular spacing) the GP posterior will reach a steady posterior state when the data are very long. We leverage this and formulate an inference scheme for GPs with general likelihoods, where inference is based on single-sweep EP (assumed density filtering). The infinite-horizon model tackles the cubic cost in the state dimensionality and reduces the cost in the state dimension m to O(m^2) per data point. The model is extended to online-learning of hyperparameters. We show examples for large finite-length modelling problems, and present how the method runs in real-time on a smartphone on a continuous data stream updated at 100 Hz.", "bibtex": "@inproceedings{NEURIPS2018_b865367f,\n author = {Solin, Arno and Hensman, James and Turner, Richard E},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Infinite-Horizon Gaussian Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b865367fc4c0845c0682bd466e6ebf4c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b865367fc4c0845c0682bd466e6ebf4c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b865367fc4c0845c0682bd466e6ebf4c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b865367fc4c0845c0682bd466e6ebf4c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b865367fc4c0845c0682bd466e6ebf4c-Reviews.html", "metareview": "", "pdf_size": 1603791, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13722784833220822191&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Aalto University; PROWLER.io; University of Cambridge", "aff_domain": "aalto.fi;prowler.io;cam.ac.uk", "email": "aalto.fi;prowler.io;cam.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b865367fc4c0845c0682bd466e6ebf4c-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Aalto University;PROWLER.io;University of Cambridge", "aff_unique_dep": ";;", "aff_unique_url": "https://www.aalto.fi;https://prowler.io;https://www.cam.ac.uk", "aff_unique_abbr": "Aalto;PROWLER.io;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Finland;United Kingdom" }, { "title": "Information Constraints on Auto-Encoding Variational Bayes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11593", "id": "11593", "author_site": "Romain Lopez, Jeffrey Regier, Michael Jordan, Nir Yosef", "author": "Romain Lopez; Jeffrey Regier; Michael I Jordan; Nir Yosef", "abstract": "Parameterizing the approximate posterior of a generative model with neural networks has become a common theme in recent machine learning research. While providing appealing flexibility, this approach makes it difficult to impose or assess structural constraints such as conditional independence. We propose a framework for learning representations that relies on Auto-Encoding Variational Bayes and whose search space is constrained via kernel-based measures of independence. In particular, our method employs the $d$-variable Hilbert-Schmidt Independence Criterion (dHSIC) to enforce independence between the latent representations and arbitrary nuisance factors.\nWe show how to apply this method to a range of problems, including the problems of learning invariant representations and the learning of interpretable representations. We also present a full-fledged application to single-cell RNA sequencing (scRNA-seq). In this setting the biological signal in mixed in complex ways with sequencing errors and sampling effects. We show that our method out-performs the state-of-the-art in this domain.", "bibtex": "@inproceedings{NEURIPS2018_9a96a2c7,\n author = {Lopez, Romain and Regier, Jeffrey and Jordan, Michael I and Yosef, Nir},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Information Constraints on Auto-Encoding Variational Bayes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9a96a2c73c0d477ff2a6da3bf538f4f4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9a96a2c73c0d477ff2a6da3bf538f4f4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9a96a2c73c0d477ff2a6da3bf538f4f4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9a96a2c73c0d477ff2a6da3bf538f4f4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9a96a2c73c0d477ff2a6da3bf538f4f4-Reviews.html", "metareview": "", "pdf_size": 1195160, "gs_citation": 172, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11454932820167233817&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of Electrical Engineering and Computer Sciences, University of California, Berkeley; Department of Electrical Engineering and Computer Sciences, University of California, Berkeley; Department of Electrical Engineering and Computer Sciences, University of California, Berkeley + Department of Statistics, University of California, Berkeley; Department of Electrical Engineering and Computer Sciences, University of California, Berkeley + Ragon Institute of MGH, MIT and Harvard + Chan-Zuckerberg Biohub", "aff_domain": "berkeley.edu;berkeley.edu;cs.berkeley.edu;berkeley.edu", "email": "berkeley.edu;berkeley.edu;cs.berkeley.edu;berkeley.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9a96a2c73c0d477ff2a6da3bf538f4f4-Abstract.html", "aff_unique_index": "0;0;0+0;0+1+2", "aff_unique_norm": "University of California, Berkeley;Ragon Institute;Chan-Zuckerberg Biohub", "aff_unique_dep": "Department of Electrical Engineering and Computer Sciences;Institute of MGH, MIT and Harvard;", "aff_unique_url": "https://www.berkeley.edu;https://www.ragoninstitute.org;https://www.chanzuckerberg.com/biohub", "aff_unique_abbr": "UC Berkeley;Ragon;", "aff_campus_unique_index": "0;0;0+0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0+0;0+0+0", "aff_country_unique": "United States" }, { "title": "Information-based Adaptive Stimulus Selection to Optimize Communication Efficiency in Brain-Computer Interfaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11473", "id": "11473", "author_site": "Boyla Mainsah, Dmitry Kalika, Leslie Collins, Siyuan Liu, Chandra Throckmorton", "author": "Boyla Mainsah; Dmitry Kalika; Leslie Collins; Siyuan Liu; Chandra Throckmorton", "abstract": "Stimulus-driven brain-computer interfaces (BCIs), such as the P300 speller, rely on using a sequence of sensory stimuli to elicit specific neural responses as control signals, while a user attends to relevant target stimuli that occur within the sequence. In current BCIs, the stimulus presentation schedule is typically generated in a pseudo-random fashion. Given the non-stationarity of brain electrical signals, a better strategy could be to adapt the stimulus presentation schedule in real-time by selecting the optimal stimuli that will maximize the signal-to-noise ratios of the elicited neural responses and provide the most information about the user's intent based on the uncertainties of the data being measured. However, the high-dimensional stimulus space limits the development of algorithms with tractable solutions for optimized stimulus selection to allow for real-time decision-making within the stringent time requirements of BCI processing. We derive a simple analytical solution of an information-based objective function for BCI stimulus selection by transforming the high-dimensional stimulus space into a one-dimensional space that parameterizes the objective function - the prior probability mass of the stimulus under consideration, irrespective of its contents. We demonstrate the utility of our adaptive stimulus selection algorithm in improving BCI performance with results from simulation and real-time human experiments.", "bibtex": "@inproceedings{NEURIPS2018_a3eb043e,\n author = {Mainsah, Boyla and Kalika, Dmitry and Collins, Leslie and Liu, Siyuan and Throckmorton, Chandra},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Information-based Adaptive Stimulus Selection to Optimize Communication Efficiency in Brain-Computer Interfaces},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a3eb043e7bf775de87763e9f8121c953-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a3eb043e7bf775de87763e9f8121c953-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a3eb043e7bf775de87763e9f8121c953-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a3eb043e7bf775de87763e9f8121c953-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a3eb043e7bf775de87763e9f8121c953-Reviews.html", "metareview": "", "pdf_size": 674813, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16704067342918745550&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Department of Electrical and Computer Engineering, Duke University, Durham, NC, USA; Johns Hopkins University Applied Physics Laboratory, Laurel, MD, USA; Department of Electrical and Computer Engineering, Duke University, Durham, NC, USA; Department of Electrical and Computer Engineering, Duke University, Durham, NC, USA; Department of Electrical and Computer Engineering, Duke University, Durham, NC, USA", "aff_domain": "duke.edu; ;duke.edu; ; ", "email": "duke.edu; ;duke.edu; ; ", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a3eb043e7bf775de87763e9f8121c953-Abstract.html", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Duke University;Johns Hopkins University", "aff_unique_dep": "Department of Electrical and Computer Engineering;Applied Physics Laboratory", "aff_unique_url": "https://www.duke.edu;https://www.jhuapl.edu", "aff_unique_abbr": "Duke;JHU APL", "aff_campus_unique_index": "0;1;0;0;0", "aff_campus_unique": "Durham;Laurel", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Information-theoretic Limits for Community Detection in Network Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11796", "id": "11796", "author_site": "Chuyang Ke, Jean Honorio", "author": "Chuyang Ke; Jean Honorio", "abstract": "We analyze the information-theoretic limits for the recovery of node labels in several network models. This includes the Stochastic Block Model, the Exponential Random Graph Model, the Latent Space Model, the Directed Preferential Attachment Model, and the Directed Small-world Model. For the Stochastic Block Model, the non-recoverability condition depends on the probabilities of having edges inside a community, and between different communities. For the Latent Space Model, the non-recoverability condition depends on the dimension of the latent space, and how far and spread are the communities in the latent space. For the Directed Preferential Attachment Model and the Directed Small-world Model, the non-recoverability condition depends on the ratio between homophily and neighborhood size. We also consider dynamic versions of the Stochastic Block Model and the Latent Space Model.", "bibtex": "@inproceedings{NEURIPS2018_1dba3025,\n author = {Ke, Chuyang and Honorio, Jean},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Information-theoretic Limits for Community Detection in Network Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1dba3025b159cd9354da65e2d0436a31-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1dba3025b159cd9354da65e2d0436a31-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1dba3025b159cd9354da65e2d0436a31-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1dba3025b159cd9354da65e2d0436a31-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1dba3025b159cd9354da65e2d0436a31-Reviews.html", "metareview": "", "pdf_size": 265605, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=381566066217426798&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, Purdue University; Department of Computer Science, Purdue University", "aff_domain": "purdue.edu;purdue.edu", "email": "purdue.edu;purdue.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1dba3025b159cd9354da65e2d0436a31-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Informative Features for Model Comparison", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11103", "id": "11103", "author_site": "Wittawat Jitkrittum, Heishiro Kanagawa, Patsorn Sangkloy, James Hays, Bernhard Sch\u00f6lkopf, Arthur Gretton", "author": "Wittawat Jitkrittum; Heishiro Kanagawa; Patsorn Sangkloy; James Hays; Bernhard Sch\u00f6lkopf; Arthur Gretton", "abstract": "Given two candidate models, and a set of target observations, we address the problem of measuring the relative goodness of fit of the two models. We propose two new statistical tests which are nonparametric, computationally efficient (runtime complexity is linear in the sample size), and interpretable. As a unique advantage, our tests can produce a set of examples (informative features) indicating the regions in the data domain where one model fits significantly better than the other. In a real-world problem of comparing GAN models, the test power of our new test matches that of the state-of-the-art test of relative goodness of fit, while being one order of magnitude faster.", "bibtex": "@inproceedings{NEURIPS2018_550a141f,\n author = {Jitkrittum, Wittawat and Kanagawa, Heishiro and Sangkloy, Patsorn and Hays, James and Sch\\\"{o}lkopf, Bernhard and Gretton, Arthur},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Informative Features for Model Comparison},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/550a141f12de6341fba65b0ad0433500-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/550a141f12de6341fba65b0ad0433500-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/550a141f12de6341fba65b0ad0433500-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/550a141f12de6341fba65b0ad0433500-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/550a141f12de6341fba65b0ad0433500-Reviews.html", "metareview": "", "pdf_size": 2444828, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=962836959160034441&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "Max Planck Institute for Intelligent Systems; Gatsby Unit, UCL; Georgia Institute of Technology; Georgia Institute of Technology; Max Planck Institute for Intelligent Systems; Gatsby Unit, UCL", "aff_domain": "tuebingen.mpg.de;gatsby.ucl.ac.uk;gatech.edu;gatech.edu;tuebingen.mpg.de;gmail.com", "email": "tuebingen.mpg.de;gatsby.ucl.ac.uk;gatech.edu;gatech.edu;tuebingen.mpg.de;gmail.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/550a141f12de6341fba65b0ad0433500-Abstract.html", "aff_unique_index": "0;1;2;2;0;1", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;University College London;Georgia Institute of Technology", "aff_unique_dep": "Intelligent Systems;Gatsby Unit;", "aff_unique_url": "https://www.mpi-is.mpg.de;https://www.ucl.ac.uk;https://www.gatech.edu", "aff_unique_abbr": "MPI-IS;UCL;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;0;1", "aff_country_unique": "Germany;United Kingdom;United States" }, { "title": "Insights on representational similarity in neural networks with canonical correlation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11558", "id": "11558", "author_site": "Ari Morcos, Maithra Raghu, Samy Bengio", "author": "Ari Morcos; Maithra Raghu; Samy Bengio", "abstract": "Comparing different neural network representations and determining how representations evolve over time remain challenging open questions in our understanding of the function of neural networks. Comparing representations in neural networks is fundamentally difficult as the structure of representations varies greatly, even across groups of networks trained on identical tasks, and over the course of training. Here, we develop projection weighted CCA (Canonical Correlation Analysis) as a tool for understanding neural networks, building off of SVCCA, a recently proposed method (Raghu et al, 2017). We first improve the core method, showing how to differentiate between signal and noise, and then apply this technique to compare across a group of CNNs, demonstrating that networks which generalize converge to more similar representations than networks which memorize, that wider networks converge to more similar solutions than narrow networks, and that trained networks with identical topology but different learning rates converge to distinct clusters with diverse representations. We also investigate the representational dynamics of RNNs, across both training and sequential timesteps, finding that RNNs converge in a bottom-up pattern over the course of training and that the hidden state is highly variable over the course of a sequence, even when accounting for linear transforms. Together, these results provide new insights into the function of CNNs and RNNs, and demonstrate the utility of using CCA to understand representations.", "bibtex": "@inproceedings{NEURIPS2018_a7a3d70c,\n author = {Morcos, Ari and Raghu, Maithra and Bengio, Samy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Insights on representational similarity in neural networks with canonical correlation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a7a3d70c6d17a73140918996d03c014f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a7a3d70c6d17a73140918996d03c014f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a7a3d70c6d17a73140918996d03c014f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a7a3d70c6d17a73140918996d03c014f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a7a3d70c6d17a73140918996d03c014f-Reviews.html", "metareview": "", "pdf_size": 1287995, "gs_citation": 507, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15689105000424764079&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "DeepMind\u2021; Google Brain+Cornell University\u2021; Google Brain", "aff_domain": "gmail.com;gmail.com;google.com", "email": "gmail.com;gmail.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a7a3d70c6d17a73140918996d03c014f-Abstract.html", "aff_unique_index": "0;1+2;1", "aff_unique_norm": "DeepMind;Google;Cornell University", "aff_unique_dep": ";Google Brain;", "aff_unique_url": "https://deepmind.com;https://brain.google.com;https://www.cornell.edu", "aff_unique_abbr": "DeepMind;Google Brain;Cornell", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1+1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Integrated accounts of behavioral and neuroimaging data using flexible recurrent neural network models", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11419", "id": "11419", "author_site": "Amir Dezfouli, Richard Morris, Fabio Ramos, Peter Dayan, Bernard Balleine", "author": "Amir Dezfouli; Richard Morris; Fabio T Ramos; Peter Dayan; Bernard Balleine", "abstract": "Neuroscience studies of human decision-making abilities commonly involve\nsubjects completing a decision-making task while BOLD signals are\nrecorded using fMRI. Hypotheses are tested about which brain regions\nmediate the effect of past experience, such as rewards, on future\nactions. One standard approach to this is model-based fMRI data\nanalysis, in which a model is fitted to the behavioral data, i.e., a\nsubject's choices, and then the neural data are parsed to find brain\nregions whose BOLD signals are related to the model's internal\nsignals. However, the internal mechanics of such purely behavioral\nmodels are not constrained by the neural data, and therefore might miss\nor mischaracterize aspects of the brain. To address this limitation, we\nintroduce a new method using recurrent neural network models that are\nflexible enough to be jointly fitted to the behavioral and neural\ndata. We trained a model so that its internal states were suitably\nrelated to neural activity during the task, while at the same time its\noutput predicted the next action a subject would execute. We then used\nthe fitted model to create a novel visualization of the relationship\nbetween the activity in brain regions at different times following a\nreward and the choices the subject subsequently made. Finally, we\nvalidated our method using a previously published dataset. We found that\nthe model was able to recover the underlying neural substrates that were\ndiscovered by explicit model engineering in the previous work, and also\nderived new results regarding the temporal pattern of brain activity.", "bibtex": "@inproceedings{NEURIPS2018_819e3d6c,\n author = {Dezfouli, Amir and Morris, Richard and Ramos, Fabio T and Dayan, Peter and Balleine, Bernard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Integrated accounts of behavioral and neuroimaging data using flexible recurrent neural network models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/819e3d6c1381eac87c17617e5165f38c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/819e3d6c1381eac87c17617e5165f38c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/819e3d6c1381eac87c17617e5165f38c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/819e3d6c1381eac87c17617e5165f38c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/819e3d6c1381eac87c17617e5165f38c-Reviews.html", "metareview": "", "pdf_size": 6383314, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8131734559793653463&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "UNSW Sydney+Data61, CSIRO; University of Sydney; University of Sydney; Gatsby Unit, UCL; UNSW Sydney", "aff_domain": "gmail.com;gmail.com;sydney.edu.au;ucl.ac.uk;unsw.edu.au", "email": "gmail.com;gmail.com;sydney.edu.au;ucl.ac.uk;unsw.edu.au", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/819e3d6c1381eac87c17617e5165f38c-Abstract.html", "aff_unique_index": "0+1;2;2;3;0", "aff_unique_norm": "University of New South Wales;CSIRO;University of Sydney;University College London", "aff_unique_dep": ";Data61;;Gatsby Unit", "aff_unique_url": "https://www.unsw.edu.au;https://www.csiro.au;https://www.sydney.edu.au;https://www.ucl.ac.uk", "aff_unique_abbr": "UNSW;CSIRO;USYD;UCL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Sydney;", "aff_country_unique_index": "0+0;0;0;1;0", "aff_country_unique": "Australia;United Kingdom" }, { "title": "Interactive Structure Learning with Structural Query-by-Committee", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11131", "id": "11131", "author_site": "Christopher Tosh, Sanjoy Dasgupta", "author": "Christopher Tosh; Sanjoy Dasgupta", "abstract": "In this work, we introduce interactive structure learning, a framework that unifies many different interactive learning tasks. We present a generalization of the query-by-committee active learning algorithm for this setting, and we study its consistency and rate of convergence, both theoretically and empirically, with and without noise.", "bibtex": "@inproceedings{NEURIPS2018_08c5433a,\n author = {Tosh, Christopher and Dasgupta, Sanjoy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Interactive Structure Learning with Structural Query-by-Committee},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/08c5433a60135c32e34f46a71175850c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/08c5433a60135c32e34f46a71175850c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/08c5433a60135c32e34f46a71175850c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/08c5433a60135c32e34f46a71175850c-Reviews.html", "metareview": "", "pdf_size": 400137, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13961476779527479919&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Columbia University; UC San Diego", "aff_domain": "columbia.edu;cs.ucsd.edu", "email": "columbia.edu;cs.ucsd.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/08c5433a60135c32e34f46a71175850c-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Columbia University;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.ucsd.edu", "aff_unique_abbr": "Columbia;UCSD", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Interpreting Neural Network Judgments via Minimal, Stable, and Symbolic Corrections", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11478", "id": "11478", "author_site": "Xin Zhang, Armando Solar-Lezama, Rishabh Singh", "author": "Xin Zhang; Armando Solar-Lezama; Rishabh Singh", "abstract": "We present a new algorithm to generate minimal, stable, and symbolic corrections to an input that will cause a neural network with ReLU activations to change its output. We argue that such a correction is a useful way to provide feedback to a user when the network's output is different from a desired output. Our algorithm generates such a correction by solving a series of linear constraint satisfaction problems. The technique is evaluated on three neural network models: one predicting whether an applicant will pay a mortgage, one predicting whether a first-order theorem can be proved efficiently by a solver using certain heuristics, and the final one judging whether a drawing is an accurate rendition of a canonical drawing of a cat.", "bibtex": "@inproceedings{NEURIPS2018_300891a6,\n author = {Zhang, Xin and Solar-Lezama, Armando and Singh, Rishabh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Interpreting Neural Network Judgments via Minimal, Stable, and Symbolic Corrections},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/300891a62162b960cf02ce3827bb363c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/300891a62162b960cf02ce3827bb363c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/300891a62162b960cf02ce3827bb363c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/300891a62162b960cf02ce3827bb363c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/300891a62162b960cf02ce3827bb363c-Reviews.html", "metareview": "", "pdf_size": 3855989, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4426611026213242702&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "CSAIL, MIT; CSAIL, MIT; Google Brain", "aff_domain": "csail.mit.edu;csail.mit.edu;google.com", "email": "csail.mit.edu;csail.mit.edu;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/300891a62162b960cf02ce3827bb363c-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Google", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory;Google Brain", "aff_unique_url": "https://www.csail.mit.edu;https://brain.google.com", "aff_unique_abbr": "MIT;Google Brain", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Cambridge;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "IntroVAE: Introspective Variational Autoencoders for Photographic Image Synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11033", "id": "11033", "author_site": "Huaibo Huang, zhihang li, Ran He, Zhenan Sun, Tieniu Tan", "author": "Huaibo Huang; zhihang li; Ran He; Zhenan Sun; Tieniu Tan", "abstract": "We present a novel introspective variational autoencoder (IntroVAE) model for synthesizing high-resolution photographic images. IntroVAE is capable of self-evaluating the quality of its generated samples and improving itself accordingly. Its inference and generator models are jointly trained in an introspective way. On one hand, the generator is required to reconstruct the input images from the noisy outputs of the inference model as normal VAEs. On the other hand, the inference model is encouraged to classify between the generated and real samples while the generator tries to fool it as GANs. These two famous generative frameworks are integrated in a simple yet efficient single-stream architecture that can be trained in a single stage. IntroVAE preserves the advantages of VAEs, such as stable training and nice latent manifold. Unlike most other hybrid models of VAEs and GANs, IntroVAE requires no extra discriminators, because the inference model itself serves as a discriminator to distinguish between the generated and real samples. Experiments demonstrate that our method produces high-resolution photo-realistic images (e.g., CELEBA images at (1024^{2})), which are comparable to or better than the state-of-the-art GANs.", "bibtex": "@inproceedings{NEURIPS2018_093f65e0,\n author = {Huang, Huaibo and li, zhihang and He, Ran and Sun, Zhenan and Tan, Tieniu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {IntroVAE: Introspective Variational Autoencoders for Photographic Image Synthesis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/093f65e080a295f8076b1c5722a46aa2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/093f65e080a295f8076b1c5722a46aa2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/093f65e080a295f8076b1c5722a46aa2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/093f65e080a295f8076b1c5722a46aa2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/093f65e080a295f8076b1c5722a46aa2-Reviews.html", "metareview": "", "pdf_size": 6216293, "gs_citation": 356, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4651969287055999365&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/093f65e080a295f8076b1c5722a46aa2-Abstract.html" }, { "title": "Invariant Representations without Adversarial Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11866", "id": "11866", "author_site": "Daniel Moyer, Shuyang Gao, Rob Brekelmans, Aram Galstyan, Greg Ver Steeg", "author": "Daniel Moyer; Shuyang Gao; Rob Brekelmans; Aram Galstyan; Greg Ver Steeg", "abstract": "Representations of data that are invariant to changes in specified factors are useful for a wide range of problems: removing potential biases in prediction problems, controlling the effects of covariates, and disentangling meaningful factors of variation. Unfortunately, learning representations that exhibit invariance to arbitrary nuisance factors yet remain useful for other tasks is challenging. Existing approaches cast the trade-off between task performance and invariance in an adversarial way, using an iterative minimax optimization. We show that adversarial training is unnecessary and sometimes counter-productive; we instead cast invariant representation learning as a single information-theoretic objective that can be directly optimized. We demonstrate that this approach matches or exceeds performance of state-of-the-art adversarial approaches for learning fair representations and for generative modeling with controllable transformations.", "bibtex": "@inproceedings{NEURIPS2018_415185ea,\n author = {Moyer, Daniel and Gao, Shuyang and Brekelmans, Rob and Galstyan, Aram and Ver Steeg, Greg},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Invariant Representations without Adversarial Training},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/415185ea244ea2b2bedeb0449b926802-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/415185ea244ea2b2bedeb0449b926802-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/415185ea244ea2b2bedeb0449b926802-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/415185ea244ea2b2bedeb0449b926802-Reviews.html", "metareview": "", "pdf_size": 572783, "gs_citation": 264, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8222665612551093103&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Information Sciences Institute; Information Sciences Institute; Information Sciences Institute; Information Sciences Institute; Information Sciences Institute", "aff_domain": "usc.edu;usc.edu;usc.edu;isi.edu;isi.edu", "email": "usc.edu;usc.edu;usc.edu;isi.edu;isi.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/415185ea244ea2b2bedeb0449b926802-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "Information Sciences Institute", "aff_unique_url": "https://isi.usc.edu", "aff_unique_abbr": "ISI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Invertibility of Convolutional Generative Networks from Partial Measurements", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11915", "id": "11915", "author_site": "Fangchang Ma, Ulas Ayaz, Sertac Karaman", "author": "Fangchang Ma; Ulas Ayaz; Sertac Karaman", "abstract": "In this work, we present new theoretical results on convolutional generative neural networks, in particular their invertibility (i.e., the recovery of input latent code given the network output). The study of network inversion problem is motivated by image inpainting and the mode collapse problem in training GAN. Network inversion is highly non-convex, and thus is typically computationally intractable and without optimality guarantees. However, we rigorously prove that, under some mild technical assumptions, the input of a two-layer convolutional generative network can be deduced from the network output efficiently using simple gradient descent. This new theoretical finding implies that the mapping from the low- dimensional latent space to the high-dimensional image space is bijective (i.e., one-to-one). In addition, the same conclusion holds even when the network output is only partially observed (i.e., with missing pixels). Our theorems hold for 2-layer convolutional generative network with ReLU as the activation function, but we demonstrate empirically that the same conclusion extends to multi-layer networks and networks with other activation functions, including the leaky ReLU, sigmoid and tanh.", "bibtex": "@inproceedings{NEURIPS2018_e0ae4561,\n author = {Ma, Fangchang and Ayaz, Ulas and Karaman, Sertac},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Invertibility of Convolutional Generative Networks from Partial Measurements},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e0ae4561193dbf6e4cf7e8f4006948e3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e0ae4561193dbf6e4cf7e8f4006948e3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e0ae4561193dbf6e4cf7e8f4006948e3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e0ae4561193dbf6e4cf7e8f4006948e3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e0ae4561193dbf6e4cf7e8f4006948e3-Reviews.html", "metareview": "", "pdf_size": 1763852, "gs_citation": 97, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13691072756611951369&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e0ae4561193dbf6e4cf7e8f4006948e3-Abstract.html" }, { "title": "Is Q-Learning Provably Efficient?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11477", "id": "11477", "author_site": "Chi Jin, Zeyuan Allen-Zhu, Sebastien Bubeck, Michael Jordan", "author": "Chi Jin; Zeyuan Allen-Zhu; Sebastien Bubeck; Michael I Jordan", "abstract": "Model-free reinforcement learning (RL) algorithms directly parameterize and update value functions or policies, bypassing the modeling of the environment. They are typically simpler, more flexible to use, and thus more prevalent in modern deep RL than model-based approaches. However, empirical work has suggested that they require large numbers of samples to learn. The theoretical question of whether not model-free algorithms are in fact \\emph{sample efficient} is one of the most fundamental questions in RL. The problem is unsolved even in the basic scenario with finitely many states and actions. We prove that, in an episodic MDP setting, Q-learning with UCB exploration achieves regret $\\tlO(\\sqrt{H^3 SAT})$ where $S$ and $A$ are the numbers of states and actions, $H$ is the number of steps per episode, and $T$ is the total number of steps. Our regret matches the optimal regret up to a single $\\sqrt{H}$ factor. Thus we establish the sample efficiency of a classical model-free approach. Moreover, to the best of our knowledge, this is the first model-free analysis to establish $\\sqrt{T}$ regret \\emph{without} requiring access to a ``simulator.''", "bibtex": "@inproceedings{NEURIPS2018_d3b1fb02,\n author = {Jin, Chi and Allen-Zhu, Zeyuan and Bubeck, Sebastien and Jordan, Michael I},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Is Q-Learning Provably Efficient?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d3b1fb02964aa64e257f9f26a31f72cf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d3b1fb02964aa64e257f9f26a31f72cf-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d3b1fb02964aa64e257f9f26a31f72cf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d3b1fb02964aa64e257f9f26a31f72cf-Reviews.html", "metareview": "", "pdf_size": 379686, "gs_citation": 1060, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12278582662266269452&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of California, Berkeley; Microsoft Research, Redmond + University of California, Berkeley; Microsoft Research, Redmond; University of California, Berkeley", "aff_domain": "cs.berkeley.edu;csail.mit.edu;microsoft.com;cs.berkeley.edu", "email": "cs.berkeley.edu;csail.mit.edu;microsoft.com;cs.berkeley.edu", "github": "", "project": "https://arxiv.org/abs/1807.03765", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d3b1fb02964aa64e257f9f26a31f72cf-Abstract.html", "aff_unique_index": "0;1+0;1;0", "aff_unique_norm": "University of California, Berkeley;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.berkeley.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UC Berkeley;MSR", "aff_campus_unique_index": "0;1+0;1;0", "aff_campus_unique": "Berkeley;Redmond", "aff_country_unique_index": "0;0+0;0;0", "aff_country_unique": "United States" }, { "title": "Isolating Sources of Disentanglement in Variational Autoencoders", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11269", "id": "11269", "author_site": "Tian Qi Chen, Xuechen (Chen) Li, Roger Grosse, David Duvenaud", "author": "Ricky T. Q. Chen; Xuechen Li; Roger B Grosse; David K. Duvenaud", "abstract": "We decompose the evidence lower bound to show the existence of a term measuring the total correlation between latent variables. We use this to motivate the beta-TCVAE (Total Correlation Variational Autoencoder) algorithm, a refinement and plug-in replacement of the beta-VAE for learning disentangled representations, requiring no additional hyperparameters during training. We further propose a principled classifier-free measure of disentanglement called the mutual information gap (MIG). We perform extensive quantitative and qualitative experiments, in both restricted and non-restricted settings, and show a strong relation between total correlation and disentanglement, when the model is trained using our framework.", "bibtex": "@inproceedings{NEURIPS2018_1ee3dfcd,\n author = {Chen, Ricky T. Q. and Li, Xuechen and Grosse, Roger B and Duvenaud, David K},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Isolating Sources of Disentanglement in Variational Autoencoders},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1ee3dfcd8a0645a25a35977997223d22-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1ee3dfcd8a0645a25a35977997223d22-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1ee3dfcd8a0645a25a35977997223d22-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1ee3dfcd8a0645a25a35977997223d22-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1ee3dfcd8a0645a25a35977997223d22-Reviews.html", "metareview": "", "pdf_size": 962415, "gs_citation": 1619, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11372263911361899725&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1ee3dfcd8a0645a25a35977997223d22-Abstract.html" }, { "title": "Iterative Value-Aware Model Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11865", "id": "11865", "author": "Amir-massoud Farahmand", "abstract": "This paper introduces a model-based reinforcement learning (MBRL) framework that incorporates the underlying decision problem in learning the transition model of the environment. This is in contrast with conventional approaches to MBRL that learn the model of the environment, for example by finding the maximum likelihood estimate, without taking into account the decision problem. Value-Aware Model Learning (VAML) framework argues that this might not be a good idea, especially if the true model of the environment does not belong to the model class from which we are estimating the model. The original VAML framework, however, may result in an optimization problem that is difficult to solve. This paper introduces a new MBRL class of algorithms, called Iterative VAML, that benefits from the structure of how the planning is performed (i.e., through approximate value iteration) to devise a simpler optimization problem. The paper theoretically analyzes Iterative VAML and provides finite sample error upper bound guarantee for it.", "bibtex": "@inproceedings{NEURIPS2018_7a2347d9,\n author = {Farahmand, Amir-massoud},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Iterative Value-Aware Model Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7a2347d96752880e3d58d72e9813cc14-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7a2347d96752880e3d58d72e9813cc14-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7a2347d96752880e3d58d72e9813cc14-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7a2347d96752880e3d58d72e9813cc14-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7a2347d96752880e3d58d72e9813cc14-Reviews.html", "metareview": "", "pdf_size": 435326, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3485846304051317616&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Vector Institute, Toronto, Canada + Mitsubishi Electric Research Laboratories, Cambridge, USA", "aff_domain": "vectorinstitute.ai", "email": "vectorinstitute.ai", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7a2347d96752880e3d58d72e9813cc14-Abstract.html", "aff_unique_index": "0+1", "aff_unique_norm": "Vector Institute;Mitsubishi Electric Research Laboratories", "aff_unique_dep": ";", "aff_unique_url": "https://vectorinstitute.ai;https://www.merl.com", "aff_unique_abbr": "Vector Institute;MERL", "aff_campus_unique_index": "0+1", "aff_campus_unique": "Toronto;Cambridge", "aff_country_unique_index": "0+1", "aff_country_unique": "Canada;United States" }, { "title": "Joint Active Feature Acquisition and Classification with Variable-Size Set Encoding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11153", "id": "11153", "author_site": "Hajin Shim, Sung Ju Hwang, Eunho Yang", "author": "Hajin Shim; Sung Ju Hwang; Eunho Yang", "abstract": "We consider the problem of active feature acquisition where the goal is to sequentially select the subset of features in order to achieve the maximum prediction performance in the most cost-effective way at test time. In this work, we formulate this active feature acquisition as a jointly learning problem of training both the classifier (environment) and the RL agent that decides either to", "bibtex": "@inproceedings{NEURIPS2018_e5841df2,\n author = {Shim, Hajin and Hwang, Sung Ju and Yang, Eunho},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Joint Active Feature Acquisition and Classification with Variable-Size Set Encoding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e5841df2166dd424a57127423d276bbe-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e5841df2166dd424a57127423d276bbe-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e5841df2166dd424a57127423d276bbe-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e5841df2166dd424a57127423d276bbe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e5841df2166dd424a57127423d276bbe-Reviews.html", "metareview": "", "pdf_size": 1081833, "gs_citation": 97, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3266370785445387023&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "KAIST1; KAIST1 + AItrics2; KAIST1 + AItrics2", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "email": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e5841df2166dd424a57127423d276bbe-Abstract.html", "aff_unique_index": "0;0+1;0+1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;AITRICS", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;", "aff_unique_abbr": "KAIST;AItrics", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea;" }, { "title": "Joint Autoregressive and Hierarchical Priors for Learned Image Compression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12018", "id": "12018", "author_site": "David Minnen, Johannes Ball\u00e9, Johannes Ball\u00e9, George D Toderici", "author": "David Minnen; Johannes Ball\u00e9; George D Toderici", "abstract": "Recent models for learned image compression are based on autoencoders that learn approximately invertible mappings from pixels to a quantized latent representation. The transforms are combined with an entropy model, which is a prior on the latent representation that can be used with standard arithmetic coding algorithms to generate a compressed bitstream. Recently, hierarchical entropy models were introduced as a way to exploit more structure in the latents than previous fully factorized priors, improving compression performance while maintaining end-to-end optimization. Inspired by the success of autoregressive priors in probabilistic generative models, we examine autoregressive, hierarchical, and combined priors as alternatives, weighing their costs and benefits in the context of image compression. While it is well known that autoregressive models can incur a significant computational penalty, we find that in terms of compression performance, autoregressive and hierarchical priors are complementary and can be combined to exploit the probabilistic structure in the latents better than all previous learned models. The combined model yields state-of-the-art rate-distortion performance and generates smaller files than existing methods: 15.8% rate reductions over the baseline hierarchical model and 59.8%, 35%, and 8.4% savings over JPEG, JPEG2000, and BPG, respectively. To the best of our knowledge, our model is the first learning-based method to outperform the top standard image codec (BPG) on both the PSNR and MS-SSIM distortion metrics.", "bibtex": "@inproceedings{NEURIPS2018_53edebc5,\n author = {Minnen, David and Ball\\'{e}, Johannes and Toderici, George D},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Joint Autoregressive and Hierarchical Priors for Learned Image Compression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/53edebc543333dfbf7c5933af792c9c4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/53edebc543333dfbf7c5933af792c9c4-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/53edebc543333dfbf7c5933af792c9c4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/53edebc543333dfbf7c5933af792c9c4-Reviews.html", "metareview": "", "pdf_size": 1829825, "gs_citation": 1568, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7854870595961228643&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Google Research; Google Research; Google Research", "aff_domain": "google.com;google.com;google.com", "email": "google.com;google.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/53edebc543333dfbf7c5933af792c9c4-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Joint Sub-bands Learning with Clique Structures for Wavelet Domain Super-Resolution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11043", "id": "11043", "author_site": "Zhisheng Zhong, Tiancheng Shen, Yibo Yang, Zhouchen Lin, Chao Zhang", "author": "Zhisheng Zhong; Tiancheng Shen; Yibo Yang; Zhouchen Lin; Chao Zhang", "abstract": "Convolutional neural networks (CNNs) have recently achieved great success in single-image super-resolution (SISR). However, these methods tend to produce over-smoothed outputs and miss some textural details. To solve these problems, we propose the Super-Resolution CliqueNet (SRCliqueNet) to reconstruct the high resolution (HR) image with better textural details in the wavelet domain. The proposed SRCliqueNet firstly extracts a set of feature maps from the low resolution (LR) image by the clique blocks group. Then we send the set of feature maps to the clique up-sampling module to reconstruct the HR image. The clique up-sampling module consists of four sub-nets which predict the high resolution wavelet coefficients of four sub-bands. Since we consider the edge feature properties of four sub-bands, the four sub-nets are connected to the others so that they can learn the coefficients of four sub-bands jointly. Finally we apply inverse discrete wavelet transform (IDWT) to the output of four sub-nets at the end of the clique up-sampling module to increase the resolution and reconstruct the HR image. Extensive quantitative and qualitative experiments on benchmark datasets show that our method achieves superior performance over the state-of-the-art methods.", "bibtex": "@inproceedings{NEURIPS2018_5f93f983,\n author = {Zhong, Zhisheng and Shen, Tiancheng and Yang, Yibo and Lin, Zhouchen and Zhang, Chao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Joint Sub-bands Learning with Clique Structures for Wavelet Domain Super-Resolution},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5f93f983524def3dca464469d2cf9f3e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5f93f983524def3dca464469d2cf9f3e-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5f93f983524def3dca464469d2cf9f3e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5f93f983524def3dca464469d2cf9f3e-Reviews.html", "metareview": "", "pdf_size": 1969142, "gs_citation": 85, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10246803154910111085&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Key Laboratory of Machine Perception (MOE), School of EECS, Peking University; Key Laboratory of Machine Perception (MOE), School of EECS, Peking University + Academy for Advanced Interdisciplinary Studies, Peking University; Key Laboratory of Machine Perception (MOE), School of EECS, Peking University + Academy for Advanced Interdisciplinary Studies, Peking University; Key Laboratory of Machine Perception (MOE), School of EECS, Peking University; Key Laboratory of Machine Perception (MOE), School of EECS, Peking University + Cooperative Medianet Innovation Center, Shanghai Jiao Tong University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5f93f983524def3dca464469d2cf9f3e-Abstract.html", "aff_unique_index": "0;0+0;0+0;0;0+1", "aff_unique_norm": "Peking University;Shanghai Jiao Tong University", "aff_unique_dep": "School of EECS;Cooperative Medianet Innovation Center", "aff_unique_url": "http://www.pku.edu.cn;https://www.sjtu.edu.cn", "aff_unique_abbr": "PKU;SJTU", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0;0+0;0;0+0", "aff_country_unique": "China" }, { "title": "KDGAN: Knowledge Distillation with Generative Adversarial Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11100", "id": "11100", "author_site": "Xiaojie Wang, Rui Zhang, Yu Sun, Jianzhong Qi", "author": "Xiaojie Wang; Rui Zhang; Yu Sun; Jianzhong Qi", "abstract": "Knowledge distillation (KD) aims to train a lightweight classifier suitable to provide accurate inference with constrained resources in multi-label learning. Instead of directly consuming feature-label pairs, the classifier is trained by a teacher, i.e., a high-capacity model whose training may be resource-hungry. The accuracy of the classifier trained this way is usually suboptimal because it is difficult to learn the true data distribution from the teacher. An alternative method is to adversarially train the classifier against a discriminator in a two-player game akin to generative adversarial networks (GAN), which can ensure the classifier to learn the true data distribution at the equilibrium of this game. However, it may take excessively long time for such a two-player game to reach equilibrium due to high-variance gradient updates. To address these limitations, we propose a three-player game named KDGAN consisting of a classifier, a teacher, and a discriminator. The classifier and the teacher learn from each other via distillation losses and are adversarially trained against the discriminator via adversarial losses. By simultaneously optimizing the distillation and adversarial losses, the classifier will learn the true data distribution at the equilibrium. We approximate the discrete distribution learned by the classifier (or the teacher) with a concrete distribution. From the concrete distribution, we generate continuous samples to obtain low-variance gradient updates, which speed up the training. Extensive experiments using real datasets confirm the superiority of KDGAN in both accuracy and training speed.", "bibtex": "@inproceedings{NEURIPS2018_019d385e,\n author = {Wang, Xiaojie and Zhang, Rui and Sun, Yu and Qi, Jianzhong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {KDGAN: Knowledge Distillation with Generative Adversarial Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/019d385eb67632a7e958e23f24bd07d7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/019d385eb67632a7e958e23f24bd07d7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/019d385eb67632a7e958e23f24bd07d7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/019d385eb67632a7e958e23f24bd07d7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/019d385eb67632a7e958e23f24bd07d7-Reviews.html", "metareview": "", "pdf_size": 722205, "gs_citation": 240, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17825569561503586591&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of Melbourne; University of Melbourne; Twitter Inc.; University of Melbourne", "aff_domain": "gmail.com;unimelb.edu.au;twitter.com;unimelb.edu.au", "email": "gmail.com;unimelb.edu.au;twitter.com;unimelb.edu.au", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/019d385eb67632a7e958e23f24bd07d7-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Melbourne;Twitter", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimelb.edu.au;https://www.twitter.com", "aff_unique_abbr": "UniMelb;Twitter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Australia;United States" }, { "title": "KONG: Kernels for ordered-neighborhood graphs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11402", "id": "11402", "author_site": "Moez Draief, Konstantin Kutzkov, Kevin Scaman, Milan Vojnovic", "author": "Moez Draief; Konstantin Kutzkov; Kevin Scaman; Milan Vojnovic", "abstract": "We present novel graph kernels for graphs with node and edge labels that have ordered neighborhoods, i.e. when neighbor nodes follow an order. Graphs with ordered neighborhoods are a natural data representation for evolving graphs where edges are created over time, which induces an order. Combining convolutional subgraph kernels and string kernels, we design new scalable algorithms for generation of explicit graph feature maps using sketching techniques. We obtain precise bounds for the approximation accuracy and computational complexity of the proposed approaches and demonstrate their applicability on real datasets. In particular, our experiments demonstrate that neighborhood ordering results in more informative features. For the special case of general graphs, i.e. graphs without ordered neighborhoods, the new graph kernels yield efficient and simple algorithms for the comparison of label distributions between graphs.", "bibtex": "@inproceedings{NEURIPS2018_d0fb963f,\n author = {Draief, Moez and Kutzkov, Konstantin and Scaman, Kevin and Vojnovic, Milan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {KONG: Kernels for ordered-neighborhood graphs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d0fb963ff976f9c37fc81fe03c21ea7b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d0fb963ff976f9c37fc81fe03c21ea7b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d0fb963ff976f9c37fc81fe03c21ea7b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d0fb963ff976f9c37fc81fe03c21ea7b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d0fb963ff976f9c37fc81fe03c21ea7b-Reviews.html", "metareview": "", "pdf_size": 363228, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7783420986460591653&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Huawei Noah\u2019s Ark Lab; London School of Economics, London; Huawei Noah\u2019s Ark Lab; London School of Economics, London", "aff_domain": "huawei.com;gmail.com;huawei.com;lse.ac.uk", "email": "huawei.com;gmail.com;huawei.com;lse.ac.uk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d0fb963ff976f9c37fc81fe03c21ea7b-Abstract.html", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Huawei;London School of Economics", "aff_unique_dep": "Noah\u2019s Ark Lab;", "aff_unique_url": "https://www.huawei.com;https://www.lse.ac.uk", "aff_unique_abbr": "Huawei;LSE", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United Kingdom" }, { "title": "Kalman Normalization: Normalizing Internal Representations Across Network Layers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11030", "id": "11030", "author_site": "Guangrun Wang, jiefeng peng, Ping Luo, Xinjiang Wang, Liang Lin", "author": "Guangrun Wang; jiefeng peng; Ping Luo; Xinjiang Wang; Liang Lin", "abstract": "As an indispensable component, Batch Normalization (BN) has successfully improved the training of deep neural networks (DNNs) with mini-batches, by normalizing the distribution of the internal representation for each hidden layer. However, the effectiveness of BN would diminish with the scenario of micro-batch (e.g. less than 4 samples in a mini-batch), since the estimated statistics in a mini-batch are not reliable with insufficient samples. This limits BN's room in training larger models on segmentation, detection, and video-related problems, which require small batches constrained by memory consumption. In this paper, we present a novel normalization method, called Kalman Normalization (KN), for improving and accelerating the training of DNNs, particularly under the context of micro-batches. Specifically, unlike the existing solutions treating each hidden layer as an isolated system, KN treats all the layers in a network as a whole system, and estimates the statistics of a certain layer by considering the distributions of all its preceding layers, mimicking the merits of Kalman Filtering. On ResNet50 trained in ImageNet, KN has 3.4% lower error than its BN counterpart when using a batch size of 4; Even when using typical batch sizes, KN still maintains an advantage over BN while other BN variants suffer a performance degradation. Moreover, KN can be naturally generalized to many existing normalization variants to obtain gains, e.g. equipping Group Normalization with Group Kalman Normalization (GKN). KN can outperform BN and its variants for large scale object detection and segmentation task in COCO 2017.", "bibtex": "@inproceedings{NEURIPS2018_e369853d,\n author = {Wang, Guangrun and peng, jiefeng and Luo, Ping and Wang, Xinjiang and Lin, Liang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Kalman Normalization: Normalizing Internal Representations Across Network Layers},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e369853df766fa44e1ed0ff613f563bd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e369853df766fa44e1ed0ff613f563bd-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e369853df766fa44e1ed0ff613f563bd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e369853df766fa44e1ed0ff613f563bd-Reviews.html", "metareview": "", "pdf_size": 689050, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2719274991135542287&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Sun Yat-sen University; Sun Yat-sen University; The Chinese University of Hong Kong; SenseTime Group Ltd.+Sun Yat-sen University; Sun Yat-sen University", "aff_domain": "mail2.sysu.edu.cn;gmail.com;gmail.com; ;ieee.org", "email": "mail2.sysu.edu.cn;gmail.com;gmail.com; ;ieee.org", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e369853df766fa44e1ed0ff613f563bd-Abstract.html", "aff_unique_index": "0;0;1;2+0;0", "aff_unique_norm": "Sun Yat-sen University;Chinese University of Hong Kong;SenseTime Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sysu.edu.cn/;https://www.cuhk.edu.hk;https://www.sensetime.com", "aff_unique_abbr": "SYSU;CUHK;SenseTime", "aff_campus_unique_index": "1;", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0+0;0", "aff_country_unique": "China" }, { "title": "Knowledge Distillation by On-the-Fly Native Ensemble", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11723", "id": "11723", "author_site": "xu lan, Xiatian Zhu, Shaogang Gong", "author": "xu lan; Xiatian Zhu; Shaogang Gong", "abstract": "Knowledge distillation is effective to train the small and generalisable network models for meeting the low-memory and fast running requirements. Existing offline distillation methods rely on a strong pre-trained teacher, which enables favourable knowledge discovery and transfer but requires a complex two-phase training procedure. Online counterparts address this limitation at the price of lacking a high-capacity teacher. In this work, we present an On-the-fly Native Ensemble (ONE) learning strategy for one-stage online distillation. Specifically, ONE only trains a single multi-branch network while simultaneously establishing a strong teacher on-the-fly to enhance the learning of target network. Extensive evaluations show that ONE improves the generalisation performance of a variety of deep neural networks more significantly than alternative methods on four image classification dataset: CIFAR10, CIFAR100, SVHN, and ImageNet, whilst having the computational efficiency advantages.", "bibtex": "@inproceedings{NEURIPS2018_94ef7214,\n author = {lan, xu and Zhu, Xiatian and Gong, Shaogang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Knowledge Distillation by On-the-Fly Native Ensemble},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/94ef7214c4a90790186e255304f8fd1f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/94ef7214c4a90790186e255304f8fd1f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/94ef7214c4a90790186e255304f8fd1f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/94ef7214c4a90790186e255304f8fd1f-Reviews.html", "metareview": "", "pdf_size": 711101, "gs_citation": 604, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11010557558161577661&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/94ef7214c4a90790186e255304f8fd1f-Abstract.html" }, { "title": "L4: Practical loss-based stepsize adaptation for deep learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11622", "id": "11622", "author_site": "Michal Rolinek, Georg Martius", "author": "Michal Rolinek; Georg Martius", "abstract": "We propose a stepsize adaptation scheme for stochastic gradient descent.\nIt operates directly with the loss function and rescales the gradient in order to make fixed predicted progress on the loss.\nWe demonstrate its capabilities by conclusively improving the performance of Adam and Momentum optimizers.\nThe enhanced optimizers with default hyperparameters\n consistently outperform their constant stepsize counterparts, even the best ones,\n without a measurable increase in computational cost.\nThe performance is validated on multiple architectures including dense nets, CNNs, ResNets, and the recurrent Differential Neural Computer on classical datasets MNIST, fashion MNIST, CIFAR10 and others.", "bibtex": "@inproceedings{NEURIPS2018_98b17f06,\n author = {Rolinek, Michal and Martius, Georg},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {L4: Practical loss-based stepsize adaptation for deep learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/98b17f068d5d9b7668e19fb8ae470841-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/98b17f068d5d9b7668e19fb8ae470841-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/98b17f068d5d9b7668e19fb8ae470841-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/98b17f068d5d9b7668e19fb8ae470841-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/98b17f068d5d9b7668e19fb8ae470841-Reviews.html", "metareview": "", "pdf_size": 989231, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12754829728893396358&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Max-Planck-Institute for Intelligent Systems T\u00fcbingen, Germany; Max-Planck-Institute for Intelligent Systems T\u00fcbingen, Germany", "aff_domain": "tuebingen.mpg.de;tuebingen.mpg.de", "email": "tuebingen.mpg.de;tuebingen.mpg.de", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/98b17f068d5d9b7668e19fb8ae470841-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Max-Planck-Institute for Intelligent Systems", "aff_unique_dep": "", "aff_unique_url": "https://www.mpi-is.mpg.de", "aff_unique_abbr": "MPI-IS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "T\u00fcbingen", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "LAG: Lazily Aggregated Gradient for Communication-Efficient Distributed Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11494", "id": "11494", "author_site": "Tianyi Chen, Georgios Giannakis, Tao Sun, Wotao Yin", "author": "Tianyi Chen; Georgios Giannakis; Tao Sun; Wotao Yin", "abstract": "This paper presents a new class of gradient methods for distributed \nmachine learning that adaptively skip the gradient calculations to \nlearn with reduced communication and computation. Simple rules \nare designed to detect slowly-varying gradients and, therefore, \ntrigger the reuse of outdated gradients. The resultant gradient-based \nalgorithms are termed Lazily Aggregated Gradient --- justifying our \nacronym LAG used henceforth. Theoretically, the merits of \nthis contribution are: i) the convergence rate is the same as batch \ngradient descent in strongly-convex, convex, and nonconvex cases; \nand, ii) if the distributed datasets are heterogeneous (quantified by \ncertain measurable constants), the communication rounds needed \nto achieve a targeted accuracy are reduced thanks to the adaptive \nreuse of lagged gradients. Numerical experiments on both \nsynthetic and real data corroborate a significant communication \nreduction compared to alternatives.", "bibtex": "@inproceedings{NEURIPS2018_feecee9f,\n author = {Chen, Tianyi and Giannakis, Georgios and Sun, Tao and Yin, Wotao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {LAG: Lazily Aggregated Gradient for Communication-Efficient Distributed Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/feecee9f1643651799ede2740927317a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/feecee9f1643651799ede2740927317a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/feecee9f1643651799ede2740927317a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/feecee9f1643651799ede2740927317a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/feecee9f1643651799ede2740927317a-Reviews.html", "metareview": "", "pdf_size": 1412666, "gs_citation": 381, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11385199413627464025&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "University of Minnesota - Twin Cities, Minneapolis, MN 55455, USA; University of Minnesota - Twin Cities, Minneapolis, MN 55455, USA; National University of Defense Technology, Changsha, Hunan 410073, China; University of California - Los Angeles, Los Angeles, CA 90095, USA", "aff_domain": "umn.edu;umn.edu;163.com;math.ucla.edu", "email": "umn.edu;umn.edu;163.com;math.ucla.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/feecee9f1643651799ede2740927317a-Abstract.html", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Minnesota;National University of Defense Technology;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "https://www.minnesota.edu;;https://www.ucla.edu", "aff_unique_abbr": "UMN;;UCLA", "aff_campus_unique_index": "0;0;1;2", "aff_campus_unique": "Twin Cities;Changsha;Los Angeles", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "LF-Net: Learning Local Features from Images", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11604", "id": "11604", "author_site": "Yuki Ono, Eduard Trulls, Pascal Fua, Kwang Moo Yi", "author": "Yuki Ono; Eduard Trulls; Pascal Fua; Kwang Moo Yi", "abstract": "We present a novel deep architecture and a training strategy to learn a local feature pipeline from scratch, using collections of images without the need for human supervision. To do so we exploit depth and relative camera pose cues to create a virtual target that the network should achieve on one image, provided the outputs of the network for the other image. While this process is inherently non-differentiable, we show that we can optimize the network in a two-branch setup by confining it to one branch, while preserving differentiability in the other. We train our method on both indoor and outdoor datasets, with depth data from 3D sensors for the former, and depth estimates from an off-the-shelf Structure-from-Motion solution for the latter. Our models outperform the state of the art on sparse feature matching on both datasets, while running at 60+ fps for QVGA images.", "bibtex": "@inproceedings{NEURIPS2018_f5496252,\n author = {Ono, Yuki and Trulls, Eduard and Fua, Pascal and Yi, Kwang Moo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {LF-Net: Learning Local Features from Images},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f5496252609c43eb8a3d147ab9b9c006-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f5496252609c43eb8a3d147ab9b9c006-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f5496252609c43eb8a3d147ab9b9c006-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f5496252609c43eb8a3d147ab9b9c006-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f5496252609c43eb8a3d147ab9b9c006-Reviews.html", "metareview": "", "pdf_size": 23410053, "gs_citation": 689, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8243342192916977654&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Sony Imaging Products & Solutions Inc.; \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne; \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne; Visual Computing Group, University of Victoria", "aff_domain": "sony.com;epfl.ch;epfl.ch;uvic.ca", "email": "sony.com;epfl.ch;epfl.ch;uvic.ca", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f5496252609c43eb8a3d147ab9b9c006-Abstract.html", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Sony;EPFL;University of Victoria", "aff_unique_dep": "Imaging Products & Solutions;;Visual Computing Group", "aff_unique_url": "https://www.sony.net/;https://www.epfl.ch;https://www.uvic.ca", "aff_unique_abbr": "Sony;EPFL;UVic", "aff_campus_unique_index": "1", "aff_campus_unique": ";Victoria", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "Japan;Switzerland;Canada" }, { "title": "Large Margin Deep Networks for Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11106", "id": "11106", "author_site": "Gamaleldin Elsayed, Dilip Krishnan, Hossein Mobahi, Kevin Regan, Samy Bengio", "author": "Gamaleldin Elsayed; Dilip Krishnan; Hossein Mobahi; Kevin Regan; Samy Bengio", "abstract": "We present a formulation of deep learning that aims at producing a large margin classifier. The notion of \\emc{margin}, minimum distance to a decision boundary, has served as the foundation of several theoretically profound and empirically successful results for both classification and regression tasks. However, most large margin algorithms are applicable only to shallow models with a preset feature representation; and conventional margin methods for neural networks only enforce margin at the output layer.\nSuch methods are therefore not well suited for deep networks. In this work, we propose a novel loss function to impose a margin on any chosen set of layers of a deep network (including input and hidden layers). Our formulation allows choosing any $l_p$ norm ($p \\geq 1$) on the metric measuring the margin. We demonstrate that the decision boundary obtained by our loss has nice properties compared to standard classification loss functions. Specifically, we show improved empirical results on the MNIST, CIFAR-10 and ImageNet datasets on multiple tasks:\ngeneralization from small training sets, corrupted labels, and robustness against adversarial perturbations. The resulting loss is general and complementary to existing data augmentation (such as random/adversarial input transform) and regularization techniques such as weight decay, dropout, and batch norm. \\footnote{Code for the large margin loss function is released at \\url{https://github.com/google-research/google-research/tree/master/large_margin}}", "bibtex": "@inproceedings{NEURIPS2018_42998cf3,\n author = {Elsayed, Gamaleldin and Krishnan, Dilip and Mobahi, Hossein and Regan, Kevin and Bengio, Samy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Large Margin Deep Networks for Classification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/42998cf32d552343bc8e460416382dca-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/42998cf32d552343bc8e460416382dca-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/42998cf32d552343bc8e460416382dca-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/42998cf32d552343bc8e460416382dca-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/42998cf32d552343bc8e460416382dca-Reviews.html", "metareview": "", "pdf_size": 1735569, "gs_citation": 357, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4375455714147672635&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Google Research; Google Research; Google Research; Google Research; Google Research", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com", "github": "https://github.com/google-research/google-research/tree/master/large_margin", "project": "https://ai.google/research/join-us/ai-residency/", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/42998cf32d552343bc8e460416382dca-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google Research", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Large Scale computation of Means and Clusters for Persistence Diagrams using Optimal Transport", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11928", "id": "11928", "author_site": "Theo Lacombe, Marco Cuturi, Steve OUDOT", "author": "Theo Lacombe; Marco Cuturi; Steve OUDOT", "abstract": "Persistence diagrams (PDs) are now routinely used to summarize the underlying topology of complex data. Despite several appealing properties, incorporating PDs in learning pipelines can be challenging because their natural geometry is not Hilbertian. Indeed, this was recently exemplified in a string of papers which show that the simple task of averaging a few PDs can be computationally prohibitive. We propose in this article a tractable framework to carry out standard tasks on PDs at scale, notably evaluating distances, estimating barycenters and performing clustering. This framework builds upon a reformulation of PD metrics as optimal transport (OT) problems. Doing so, we can exploit recent computational advances: the OT problem on a planar grid, when regularized with entropy, is convex can be solved in linear time using the Sinkhorn algorithm and convolutions. This results in scalable computations that can stream on GPUs. We demonstrate the efficiency of our approach by carrying out clustering with diagrams metrics on several thousands of PDs, a scale never seen before in the literature.", "bibtex": "@inproceedings{NEURIPS2018_b58f7d18,\n author = {Lacombe, Theo and Cuturi, Marco and OUDOT, Steve},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Large Scale computation of Means and Clusters for Persistence Diagrams using Optimal Transport},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b58f7d184743106a8a66028b7a28937c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b58f7d184743106a8a66028b7a28937c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b58f7d184743106a8a66028b7a28937c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b58f7d184743106a8a66028b7a28937c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b58f7d184743106a8a66028b7a28937c-Reviews.html", "metareview": "", "pdf_size": 1016181, "gs_citation": 88, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14983815882251793856&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Datashape+Inria Saclay; Google Brain+CREST, ENSAE; Datashape+Inria Saclay", "aff_domain": "inria.fr;google.com;inria.fr", "email": "inria.fr;google.com;inria.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b58f7d184743106a8a66028b7a28937c-Abstract.html", "aff_unique_index": "0+1;2+3;0+1", "aff_unique_norm": "Datashape;INRIA;Google;CREST", "aff_unique_dep": ";;Google Brain;", "aff_unique_url": ";https://www.inria.fr;https://brain.google.com;https://www.crest.fr", "aff_unique_abbr": ";Inria;Google Brain;CREST", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Saclay;Mountain View", "aff_country_unique_index": "1;2+1;1", "aff_country_unique": ";France;United States" }, { "title": "Large-Scale Stochastic Sampling from the Probability Simplex", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11649", "id": "11649", "author_site": "Jack Baker, Paul Fearnhead, Emily Fox, Christopher Nemeth", "author": "Jack Baker; Paul Fearnhead; Emily B. Fox; Christopher Nemeth", "abstract": "Stochastic gradient Markov chain Monte Carlo (SGMCMC) has become a popular method for scalable Bayesian inference. These methods are based on sampling a discrete-time approximation to a continuous time process, such as the Langevin diffusion. When applied to distributions defined on a constrained space the time-discretization error can dominate when we are near the boundary of the space. We demonstrate that because of this, current SGMCMC methods for the simplex struggle with sparse simplex spaces; when many of the components are close to zero. Unfortunately, many popular large-scale Bayesian models, such as network or topic models, require inference on sparse simplex spaces. To avoid the biases caused by this discretization error, we propose the stochastic Cox-Ingersoll-Ross process (SCIR), which removes all discretization error and we prove that samples from the SCIR process are asymptotically unbiased. We discuss how this idea can be extended to target other constrained spaces. Use of the SCIR process within a SGMCMC algorithm is shown to give substantially better performance for a topic model and a Dirichlet process mixture model than existing SGMCMC approaches.", "bibtex": "@inproceedings{NEURIPS2018_900c563b,\n author = {Baker, Jack and Fearnhead, Paul and Fox, Emily and Nemeth, Christopher},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Large-Scale Stochastic Sampling from the Probability Simplex},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/900c563bfd2c48c16701acca83ad858a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/900c563bfd2c48c16701acca83ad858a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/900c563bfd2c48c16701acca83ad858a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/900c563bfd2c48c16701acca83ad858a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/900c563bfd2c48c16701acca83ad858a-Reviews.html", "metareview": "", "pdf_size": 380167, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9892795582424041794&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "STOR-i CDT, Mathematics and Statistics, Lancaster University; Mathematics and Statistics, Lancaster University; Computer Science & Engineering and Statistics, University of Washington; Mathematics and Statistics, Lancaster University", "aff_domain": "lancaster.ac.uk;lancaster.ac.uk;uw.edu;lancaster.ac.uk", "email": "lancaster.ac.uk;lancaster.ac.uk;uw.edu;lancaster.ac.uk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/900c563bfd2c48c16701acca83ad858a-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Lancaster University;University of Washington", "aff_unique_dep": "Mathematics and Statistics;Computer Science & Engineering and Statistics", "aff_unique_url": "https://www.lancaster.ac.uk;https://www.washington.edu", "aff_unique_abbr": "Lancaster;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Latent Alignment and Variational Attention", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11923", "id": "11923", "author_site": "Yuntian Deng, Yoon Kim, Justin Chiu, Demi Guo, Alexander Rush", "author": "Yuntian Deng; Yoon Kim; Justin Chiu; Demi Guo; Alexander Rush", "abstract": "Neural attention has become central to many state-of-the-art models in natural language processing and related domains. Attention networks are an easy-to-train and effective method for softly simulating alignment; however, the approach does not marginalize over latent alignments in a probabilistic sense. This property makes it difficult to compare attention to other alignment approaches, to compose it with probabilistic models, and to perform posterior inference conditioned on observed data. A related latent approach, hard attention, fixes these issues, but is generally harder to train and less accurate. This work considers variational attention networks, alternatives to soft and hard attention for learning latent variable alignment models, with tighter approximation bounds based on amortized variational inference. We further propose methods for reducing the variance of gradients to make these approaches computationally feasible. Experiments show that for machine translation and visual question answering, inefficient exact latent variable models outperform standard neural attention, but these gains go away when using hard attention based training. On the other hand, variational attention retains most of the performance gain but with training speed comparable to neural attention.", "bibtex": "@inproceedings{NEURIPS2018_b691334c,\n author = {Deng, Yuntian and Kim, Yoon and Chiu, Justin and Guo, Demi and Rush, Alexander},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Latent Alignment and Variational Attention},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b691334ccf10d4ab144d672f7783c8a3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b691334ccf10d4ab144d672f7783c8a3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b691334ccf10d4ab144d672f7783c8a3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b691334ccf10d4ab144d672f7783c8a3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b691334ccf10d4ab144d672f7783c8a3-Reviews.html", "metareview": "", "pdf_size": 844264, "gs_citation": 161, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6335407498429393003&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "School of Engineering and Applied Sciences, Harvard University; School of Engineering and Applied Sciences, Harvard University; School of Engineering and Applied Sciences, Harvard University; School of Engineering and Applied Sciences, Harvard University; School of Engineering and Applied Sciences, Harvard University", "aff_domain": "seas.harvard.edu;seas.harvard.edu;g.harvard.edu;college.harvard.edu;seas.harvard.edu", "email": "seas.harvard.edu;seas.harvard.edu;g.harvard.edu;college.harvard.edu;seas.harvard.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b691334ccf10d4ab144d672f7783c8a3-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "School of Engineering and Applied Sciences", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Latent Gaussian Activity Propagation: Using Smoothness and Structure to Separate and Localize Sounds in Large Noisy Environments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11348", "id": "11348", "author_site": "Daniel D. Johnson, Daniel Gorelik, Ross E Mawhorter, Kyle Suver, Weiqing Gu, Steven Xing, Cody Gabriel, Peter Sankhagowit", "author": "Daniel Johnson; Daniel Gorelik; Ross E Mawhorter; Kyle Suver; Weiqing Gu; Steven Xing; Cody Gabriel; Peter Sankhagowit", "abstract": "We present an approach for simultaneously separating and localizing\nmultiple sound sources using recorded microphone data. Inspired by topic\nmodels, our approach is based on a probabilistic model of inter-microphone\nphase differences, and poses separation and localization as a Bayesian\ninference problem. We assume sound activity is locally smooth across time,\nfrequency, and location, and use the known position of the microphones to\nobtain a consistent separation. We compare the performance of our method\nagainst existing algorithms on simulated anechoic voice data and find that it\nobtains high performance across a variety of input conditions.", "bibtex": "@inproceedings{NEURIPS2018_7dd0240c,\n author = {Johnson, Daniel and Gorelik, Daniel and Mawhorter, Ross E and Suver, Kyle and Gu, Weiqing and Xing, Steven and Gabriel, Cody and Sankhagowit, Peter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Latent Gaussian Activity Propagation: Using Smoothness and Structure to Separate and Localize Sounds in Large Noisy Environments},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7dd0240cd412efde8bc165e864d3644f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7dd0240cd412efde8bc165e864d3644f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7dd0240cd412efde8bc165e864d3644f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7dd0240cd412efde8bc165e864d3644f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7dd0240cd412efde8bc165e864d3644f-Reviews.html", "metareview": "", "pdf_size": 1532107, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3681191126685212871&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Department of Mathematics, Harvey Mudd College; Department of Mathematics, Harvey Mudd College; Department of Mathematics, Harvey Mudd College; Department of Mathematics, Harvey Mudd College; Department of Mathematics, Harvey Mudd College; Intel Corporation; Intel Corporation; Intel Corporation", "aff_domain": "hmc.edu;hmc.edu;hmc.edu;hmc.edu;hmc.edu;intel.com;intel.com;intel.com", "email": "hmc.edu;hmc.edu;hmc.edu;hmc.edu;hmc.edu;intel.com;intel.com;intel.com", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7dd0240cd412efde8bc165e864d3644f-Abstract.html", "aff_unique_index": "0;0;0;0;0;1;1;1", "aff_unique_norm": "Harvey Mudd College;Intel", "aff_unique_dep": "Department of Mathematics;Intel Corporation", "aff_unique_url": "https://www.hmc.edu;https://www.intel.com", "aff_unique_abbr": "HMC;Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Layer-Wise Coordination between Encoder and Decoder for Neural Machine Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11762", "id": "11762", "author_site": "Tianyu He, Xu Tan, Yingce Xia, Di He, Tao Qin, Zhibo Chen, Tie-Yan Liu", "author": "Tianyu He; Xu Tan; Yingce Xia; Di He; Tao Qin; Zhibo Chen; Tie-Yan Liu", "abstract": "Neural Machine Translation (NMT) has achieved remarkable progress with the quick evolvement of model structures. In this paper, we propose the concept of layer-wise coordination for NMT, which explicitly coordinates the learning of hidden representations of the encoder and decoder together layer by layer, gradually from low level to high level. Specifically, we design a layer-wise attention and mixed attention mechanism, and further share the parameters of each layer between the encoder and decoder to regularize and coordinate the learning. Experiments show that combined with the state-of-the-art Transformer model, layer-wise coordination achieves improvements on three IWSLT and two WMT translation tasks. More specifically, our method achieves 34.43 and 29.01 BLEU score on WMT16 English-Romanian and WMT14 English-German tasks, outperforming the Transformer baseline.", "bibtex": "@inproceedings{NEURIPS2018_4fb8a7a2,\n author = {He, Tianyu and Tan, Xu and Xia, Yingce and He, Di and Qin, Tao and Chen, Zhibo and Liu, Tie-Yan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Layer-Wise Coordination between Encoder and Decoder for Neural Machine Translation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4fb8a7a22a82c80f2c26fe6c1e0dcbb3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4fb8a7a22a82c80f2c26fe6c1e0dcbb3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4fb8a7a22a82c80f2c26fe6c1e0dcbb3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4fb8a7a22a82c80f2c26fe6c1e0dcbb3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4fb8a7a22a82c80f2c26fe6c1e0dcbb3-Reviews.html", "metareview": "", "pdf_size": 497555, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14258883426797488339&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "1CAS Key Laboratory of Technology in Geo-spatial Information Processing and Application System, University of Science and Technology of China; 2Microsoft Research; 2Microsoft Research; 3Key Laboratory of Machine Perception, MOE, School of EECS, Peking University; 2Microsoft Research; 1CAS Key Laboratory of Technology in Geo-spatial Information Processing and Application System, University of Science and Technology of China; 2Microsoft Research", "aff_domain": "mail.ustc.edu.cn;microsoft.com;microsoft.com;pku.edu.cn;microsoft.com;ustc.edu.cn;microsoft.com", "email": "mail.ustc.edu.cn;microsoft.com;microsoft.com;pku.edu.cn;microsoft.com;ustc.edu.cn;microsoft.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4fb8a7a22a82c80f2c26fe6c1e0dcbb3-Abstract.html", "aff_unique_index": "0;1;1;2;1;0;1", "aff_unique_norm": "University of Science and Technology of China;Microsoft;Peking University", "aff_unique_dep": "Key Laboratory of Technology in Geo-spatial Information Processing and Application System;Microsoft Research;School of EECS", "aff_unique_url": "http://www.ustc.edu.cn/;https://www.microsoft.com/en-us/research;http://www.pku.edu.cn", "aff_unique_abbr": "USTC;MSR;PKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Learn What Not to Learn: Action Elimination with Deep Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11357", "id": "11357", "author_site": "Tom Zahavy, Matan Haroush, Nadav Merlis, Daniel J Mankowitz, Shie Mannor", "author": "Tom Zahavy; Matan Haroush; Nadav Merlis; Daniel J Mankowitz; Shie Mannor", "abstract": "Learning how to act when there are many available actions in each state is a challenging task for Reinforcement Learning (RL) agents, especially when many of the actions are redundant or irrelevant. In such cases, it is easier to learn which actions not to take. In this work, we propose the Action-Elimination Deep Q-Network (AE-DQN) architecture that combines a Deep RL algorithm with an Action Elimination Network (AEN) that eliminates sub-optimal actions. The AEN is trained to predict invalid actions, supervised by an external elimination signal provided by the environment. Simulations demonstrate a considerable speedup and added robustness over vanilla DQN in text-based games with over a thousand discrete actions.", "bibtex": "@inproceedings{NEURIPS2018_645098b0,\n author = {Zahavy, Tom and Haroush, Matan and Merlis, Nadav and Mankowitz, Daniel J and Mannor, Shie},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learn What Not to Learn: Action Elimination with Deep Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/645098b086d2f9e1e0e939c27f9f2d6f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/645098b086d2f9e1e0e939c27f9f2d6f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/645098b086d2f9e1e0e939c27f9f2d6f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/645098b086d2f9e1e0e939c27f9f2d6f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/645098b086d2f9e1e0e939c27f9f2d6f-Reviews.html", "metareview": "", "pdf_size": 1618825, "gs_citation": 259, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11277524063957057026&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "The Technion - Israel Institute of Technology; The Technion - Israel Institute of Technology; The Technion - Israel Institute of Technology; Deepmind; The Technion - Israel Institute of Technology", "aff_domain": "campus.technion.ac.il;campus.technion.ac.il;campus.technion.ac.il; ; ", "email": "campus.technion.ac.il;campus.technion.ac.il;campus.technion.ac.il; ; ", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/645098b086d2f9e1e0e939c27f9f2d6f-Abstract.html", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Israel Institute of Technology;DeepMind", "aff_unique_dep": ";", "aff_unique_url": "https://www.technion.ac.il/en/;https://deepmind.com", "aff_unique_abbr": "Technion;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Israel;United Kingdom" }, { "title": "Learning Abstract Options", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11986", "id": "11986", "author_site": "Matthew Riemer, Miao Liu, Gerald Tesauro", "author": "Matthew Riemer; Miao Liu; Gerald Tesauro", "abstract": "Part of", "bibtex": "@inproceedings{NEURIPS2018_cdf28f8b,\n author = {Riemer, Matthew and Liu, Miao and Tesauro, Gerald},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Abstract Options},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cdf28f8b7d14ab02d12a2329d71e4079-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cdf28f8b7d14ab02d12a2329d71e4079-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cdf28f8b7d14ab02d12a2329d71e4079-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cdf28f8b7d14ab02d12a2329d71e4079-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cdf28f8b7d14ab02d12a2329d71e4079-Reviews.html", "metareview": "", "pdf_size": 3600255, "gs_citation": 105, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11011398819596592324&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "IBM Research; IBM Research; IBM Research", "aff_domain": "us.ibm.com;us.ibm.com;us.ibm.com", "email": "us.ibm.com;us.ibm.com;us.ibm.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cdf28f8b7d14ab02d12a2329d71e4079-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "IBM", "aff_unique_dep": "IBM Research", "aff_unique_url": "https://www.ibm.com/research", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Attentional Communication for Multi-Agent Cooperation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11699", "id": "11699", "author_site": "Jiechuan Jiang, Zongqing Lu", "author": "Jiechuan Jiang; Zongqing Lu", "abstract": "Communication could potentially be an effective way for multi-agent cooperation. However, information sharing among all agents or in predefined communication architectures that existing methods adopt can be problematic. When there is a large number of agents, agents cannot differentiate valuable information that helps cooperative decision making from globally shared information. Therefore, communication barely helps, and could even impair the learning of multi-agent cooperation. Predefined communication architectures, on the other hand, restrict communication among agents and thus restrain potential cooperation. To tackle these difficulties, in this paper, we propose an attentional communication model that learns when communication is needed and how to integrate shared information for cooperative decision making. Our model leads to efficient and effective communication for large-scale multi-agent cooperation. Empirically, we show the strength of our model in a variety of cooperative scenarios, where agents are able to develop more coordinated and sophisticated strategies than existing methods.", "bibtex": "@inproceedings{NEURIPS2018_6a8018b3,\n author = {Jiang, Jiechuan and Lu, Zongqing},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Attentional Communication for Multi-Agent Cooperation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6a8018b3a00b69c008601b8becae392b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6a8018b3a00b69c008601b8becae392b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6a8018b3a00b69c008601b8becae392b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6a8018b3a00b69c008601b8becae392b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6a8018b3a00b69c008601b8becae392b-Reviews.html", "metareview": "", "pdf_size": 993699, "gs_citation": 651, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7833785015105618077&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Peking University; Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn", "email": "pku.edu.cn;pku.edu.cn", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6a8018b3a00b69c008601b8becae392b-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Learning Attractor Dynamics for Generative Memory", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11893", "id": "11893", "author_site": "Yan Wu, Gregory Wayne, Karol Gregor, Timothy Lillicrap", "author": "Yan Wu; Gregory Wayne; Karol Gregor; Timothy Lillicrap", "abstract": "A central challenge faced by memory systems is the robust retrieval of a stored pattern in the presence of interference due to other stored patterns and noise. A theoretically well-founded solution to robust retrieval is given by attractor dynamics, which iteratively cleans up patterns during recall. However, incorporating attractor dynamics into modern deep learning systems poses difficulties: attractor basins are characterised by vanishing gradients, which are known to make training neural networks difficult. In this work, we exploit recent advances in variational inference and avoid the vanishing gradient problem by training a generative distributed memory with a variational lower-bound-based Lyapunov function. The model is minimalistic with surprisingly few parameters. Experiments shows it converges to correct patterns upon iterative retrieval and achieves competitive performance as both a memory model and a generative model.", "bibtex": "@inproceedings{NEURIPS2018_6e4243f5,\n author = {Wu, Yan and Wayne, Gregory and Gregor, Karol and Lillicrap, Timothy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Attractor Dynamics for Generative Memory},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6e4243f5511fd6ef0f03e9f386d54403-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6e4243f5511fd6ef0f03e9f386d54403-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6e4243f5511fd6ef0f03e9f386d54403-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6e4243f5511fd6ef0f03e9f386d54403-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6e4243f5511fd6ef0f03e9f386d54403-Reviews.html", "metareview": "", "pdf_size": 2157870, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9940290258944118765&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "DeepMind; DeepMind; DeepMind; DeepMind", "aff_domain": "google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6e4243f5511fd6ef0f03e9f386d54403-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "DeepMind", "aff_unique_dep": "", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Learning Beam Search Policies via Imitation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12007", "id": "12007", "author_site": "Renato Negrinho, Matthew Gormley, Geoffrey Gordon", "author": "Renato Negrinho; Matthew Gormley; Geoffrey J. Gordon", "abstract": "Beam search is widely used for approximate decoding in structured prediction problems. Models often use a beam at test time but ignore its existence at train time, and therefore do not explicitly learn how to use the beam. We develop an unifying meta-algorithm for learning beam search policies using imitation learning. In our setting, the beam is part of the model and not just an artifact of approximate decoding. Our meta-algorithm captures existing learning algorithms and suggests new ones. It also lets us show novel no-regret guarantees for learning beam search policies.", "bibtex": "@inproceedings{NEURIPS2018_967c2ae0,\n author = {Negrinho, Renato and Gormley, Matthew and Gordon, Geoffrey J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Beam Search Policies via Imitation Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/967c2ae04b169f07e7fa8fdfd110551e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/967c2ae04b169f07e7fa8fdfd110551e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/967c2ae04b169f07e7fa8fdfd110551e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/967c2ae04b169f07e7fa8fdfd110551e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/967c2ae04b169f07e7fa8fdfd110551e-Reviews.html", "metareview": "", "pdf_size": 285603, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12690180817124727252&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Machine Learning Department, Carnegie Mellon University; Machine Learning Department, Carnegie Mellon University; Machine Learning Department, Carnegie Mellon University + Microsoft Research", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/967c2ae04b169f07e7fa8fdfd110551e-Abstract.html", "aff_unique_index": "0;0;0+1", "aff_unique_norm": "Carnegie Mellon University;Microsoft", "aff_unique_dep": "Machine Learning Department;Microsoft Research", "aff_unique_url": "https://www.cmu.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "CMU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+0", "aff_country_unique": "United States" }, { "title": "Learning Bounds for Greedy Approximation with Explicit Feature Maps from Multiple Kernels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11461", "id": "11461", "author_site": "Shahin Shahrampour, Vahid Tarokh", "author": "Shahin Shahrampour; Vahid Tarokh", "abstract": "Nonlinear kernels can be approximated using finite-dimensional feature maps for efficient risk minimization. Due to the inherent trade-off between the dimension of the (mapped) feature space and the approximation accuracy, the key problem is to identify promising (explicit) features leading to a satisfactory out-of-sample performance. In this work, we tackle this problem by efficiently choosing such features from multiple kernels in a greedy fashion. Our method sequentially selects these explicit features from a set of candidate features using a correlation metric. We establish an out-of-sample error bound capturing the trade-off between the error in terms of explicit features (approximation error) and the error due to spectral properties of the best model in the Hilbert space associated to the combined kernel (spectral error). The result verifies that when the (best) underlying data model is sparse enough, i.e., the spectral error is negligible, one can control the test error with a small number of explicit features, that can scale poly-logarithmically with data. Our empirical results show that given a fixed number of explicit features, the method can achieve a lower test error with a smaller time cost, compared to the state-of-the-art in data-dependent random features.", "bibtex": "@inproceedings{NEURIPS2018_80537a94,\n author = {Shahrampour, Shahin and Tarokh, Vahid},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Bounds for Greedy Approximation with Explicit Feature Maps from Multiple Kernels},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/80537a945c7aaa788ccfcdf1b99b5d8f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/80537a945c7aaa788ccfcdf1b99b5d8f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/80537a945c7aaa788ccfcdf1b99b5d8f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/80537a945c7aaa788ccfcdf1b99b5d8f-Reviews.html", "metareview": "", "pdf_size": 545516, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11018844754262284410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Industrial and Systems Engineering, Texas A&M University; Department of Electrical and Computer Engineering, Duke University", "aff_domain": "tamu.edu;duke.edu", "email": "tamu.edu;duke.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/80537a945c7aaa788ccfcdf1b99b5d8f-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Texas A&M University;Duke University", "aff_unique_dep": "Department of Industrial and Systems Engineering;Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.tamu.edu;https://www.duke.edu", "aff_unique_abbr": "TAMU;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Compressed Transforms with Low Displacement Rank", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11863", "id": "11863", "author_site": "Anna Thomas, Albert Gu, Tri Dao, Atri Rudra, Christopher R\u00e9", "author": "Anna Thomas; Albert Gu; Tri Dao; Atri Rudra; Christopher R\u00e9", "abstract": "The low displacement rank (LDR) framework for structured matrices represents a matrix through two displacement operators and a low-rank residual. Existing use of LDR matrices in deep learning has applied fixed displacement operators encoding forms of shift invariance akin to convolutions. We introduce a rich class of LDR matrices with more general displacement operators, and explicitly learn over both the operators and the low-rank component. This class generalizes several previous constructions while preserving compression and efficient computation. We prove bounds on the VC dimension of multi-layer neural networks with structured weight matrices and show empirically that our compact parameterization can reduce the sample complexity of learning. When replacing weight layers in fully-connected, convolutional, and recurrent neural networks for image classification and language modeling tasks, our new classes exceed the accuracy of existing compression approaches, and on some tasks even outperform general unstructured layers while using more than 20x fewer parameters.", "bibtex": "@inproceedings{NEURIPS2018_8e621619,\n author = {Thomas, Anna and Gu, Albert and Dao, Tri and Rudra, Atri and R\\'{e}, Christopher},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Compressed Transforms with Low Displacement Rank},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8e621619d71d0ae5ef4e631ad586334f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8e621619d71d0ae5ef4e631ad586334f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8e621619d71d0ae5ef4e631ad586334f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8e621619d71d0ae5ef4e631ad586334f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8e621619d71d0ae5ef4e631ad586334f-Reviews.html", "metareview": "", "pdf_size": 2425289, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8419515952370992696&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science, Stanford University; Department of Computer Science, Stanford University; Department of Computer Science, Stanford University; Department of Computer Science and Engineering, University at Buffalo, SUNY; Department of Computer Science, Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;buffalo.edu;cs.stanford.edu", "email": "stanford.edu;stanford.edu;stanford.edu;buffalo.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8e621619d71d0ae5ef4e631ad586334f-Abstract.html", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Stanford University;University at Buffalo", "aff_unique_dep": "Department of Computer Science;Department of Computer Science and Engineering", "aff_unique_url": "https://www.stanford.edu;https://www.buffalo.edu", "aff_unique_abbr": "Stanford;UB", "aff_campus_unique_index": "0;0;0;1;0", "aff_campus_unique": "Stanford;Buffalo", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Concave Conditional Likelihood Models for Improved Analysis of Tandem Mass Spectra", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11529", "id": "11529", "author_site": "John Halloran, David M Rocke", "author": "John T Halloran; David M Rocke", "abstract": "The most widely used technology to identify the proteins present in a complex biological sample is tandem mass spectrometry, which quickly produces a large collection of spectra representative of the peptides (i.e., protein subsequences) present in the original sample. In this work, we greatly expand the parameter learning capabilities of a dynamic Bayesian network (DBN) peptide-scoring algorithm, Didea, by deriving emission distributions for which its conditional log-likelihood scoring function remains concave. We show that this class of emission distributions, called Convex Virtual Emissions (CVEs), naturally generalizes the log-sum-exp function while rendering both maximum likelihood estimation and conditional maximum likelihood estimation concave for a wide range of Bayesian networks. Utilizing CVEs in Didea allows efficient learning of a large number of parameters while ensuring global convergence, in stark contrast to Didea\u2019s previous parameter learning framework (which could only learn a single parameter using a costly grid search) and other trainable models (which only ensure convergence to local optima). The newly trained scoring function substantially outperforms the state-of-the-art in both scoring function accuracy and downstream Fisher kernel analysis. Furthermore, we significantly improve Didea\u2019s runtime performance through successive optimizations to its message passing schedule and derive explicit connections between Didea\u2019s new concave score and related MS/MS scoring functions.", "bibtex": "@inproceedings{NEURIPS2018_4ebccfb3,\n author = {Halloran, John T and Rocke, David M},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Concave Conditional Likelihood Models for Improved Analysis of Tandem Mass Spectra},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4ebccfb3e317c7789f04f7a558df4537-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4ebccfb3e317c7789f04f7a558df4537-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4ebccfb3e317c7789f04f7a558df4537-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4ebccfb3e317c7789f04f7a558df4537-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4ebccfb3e317c7789f04f7a558df4537-Reviews.html", "metareview": "", "pdf_size": 1080469, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14619016553761026660&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Department of Public Health Sciences, University of California, Davis; Department of Public Health Sciences, University of California, Davis", "aff_domain": "ucdavis.edu;ucdavis.edu", "email": "ucdavis.edu;ucdavis.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4ebccfb3e317c7789f04f7a558df4537-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Davis", "aff_unique_dep": "Department of Public Health Sciences", "aff_unique_url": "https://www.ucdavis.edu", "aff_unique_abbr": "UC Davis", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Davis", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Conditioned Graph Structures for Interpretable Visual Question Answering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11797", "id": "11797", "author_site": "Will Norcliffe-Brown, Stathis Vafeias, Sarah Parisot", "author": "Will Norcliffe-Brown; Stathis Vafeias; Sarah Parisot", "abstract": "Visual Question answering is a challenging problem requiring a combination of concepts from Computer Vision and Natural Language Processing. Most existing approaches use a two streams strategy, computing image and question features that are consequently merged using a variety of techniques. Nonetheless, very few rely on higher level image representations, which can capture semantic and spatial relationships. In this paper, we propose a novel graph-based approach for Visual Question Answering. Our method combines a graph learner module, which learns a question specific graph representation of the input image, with the recent concept of graph convolutions, aiming to learn image representations that capture question specific interactions. We test our approach on the VQA v2 dataset using a simple baseline architecture enhanced by the proposed graph learner module. We obtain promising results with 66.18% accuracy and demonstrate the interpretability of the proposed method. Code can be found at github.com/aimbrain/vqa-project.", "bibtex": "@inproceedings{NEURIPS2018_4aeae10e,\n author = {Norcliffe-Brown, Will and Vafeias, Stathis and Parisot, Sarah},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Conditioned Graph Structures for Interpretable Visual Question Answering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4aeae10ea1c6433c926cdfa558d31134-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4aeae10ea1c6433c926cdfa558d31134-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4aeae10ea1c6433c926cdfa558d31134-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4aeae10ea1c6433c926cdfa558d31134-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4aeae10ea1c6433c926cdfa558d31134-Reviews.html", "metareview": "", "pdf_size": 4044533, "gs_citation": 311, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16899155560172978534&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "AimBrain Ltd.; AimBrain Ltd.; AimBrain Ltd.", "aff_domain": "aimbrain.com;aimbrain.com;aimbrain.com", "email": "aimbrain.com;aimbrain.com;aimbrain.com", "github": "github.com/aimbrain/vqa-project", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4aeae10ea1c6433c926cdfa558d31134-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "AimBrain", "aff_unique_dep": "", "aff_unique_url": "https://www.aimbrain.com", "aff_unique_abbr": "AimBrain", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Learning Confidence Sets using Support Vector Machines", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11483", "id": "11483", "author_site": "Wenbo Wang, Xingye Qiao", "author": "Wenbo Wang; Xingye Qiao", "abstract": "The goal of confidence-set learning in the binary classification setting is to construct two sets, each with a specific probability guarantee to cover a class. An observation outside the overlap of the two sets is deemed to be from one of the two classes, while the overlap is an ambiguity region which could belong to either class. Instead of plug-in approaches, we propose a support vector classifier to construct confidence sets in a flexible manner. Theoretically, we show that the proposed learner can control the non-coverage rates and minimize the ambiguity with high probability. Efficient algorithms are developed and numerical studies illustrate the effectiveness of the proposed method.", "bibtex": "@inproceedings{NEURIPS2018_8b422406,\n author = {Wang, Wenbo and Qiao, Xingye},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Confidence Sets using Support Vector Machines},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8b4224068a41c5d37f5e2d54f3995089-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8b4224068a41c5d37f5e2d54f3995089-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8b4224068a41c5d37f5e2d54f3995089-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8b4224068a41c5d37f5e2d54f3995089-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8b4224068a41c5d37f5e2d54f3995089-Reviews.html", "metareview": "", "pdf_size": 1784201, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2767399351379802650&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Mathematical Sciences, Binghamton University; Department of Mathematical Sciences, Binghamton University", "aff_domain": "math.binghamton.edu;math.binghamton.edu", "email": "math.binghamton.edu;math.binghamton.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8b4224068a41c5d37f5e2d54f3995089-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Binghamton University", "aff_unique_dep": "Department of Mathematical Sciences", "aff_unique_url": "https://www.binghamton.edu", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Deep Disentangled Embeddings With the F-Statistic Loss", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11045", "id": "11045", "author_site": "Karl Ridgeway, Michael Mozer", "author": "Karl Ridgeway; Michael Mozer", "abstract": "Deep-embedding methods aim to discover representations of a domain that make explicit the domain's class structure and thereby support few-shot learning. Disentangling methods aim to make explicit compositional or factorial structure. We combine these two active but independent lines of research and propose a new paradigm suitable for both goals. We propose and evaluate a novel loss function based on the $F$ statistic, which describes the separation of two or more distributions. By ensuring that distinct classes are well separated on a subset of embedding dimensions, we obtain embeddings that are useful for few-shot learning. By not requiring separation on all dimensions, we encourage the discovery of disentangled representations. Our embedding method matches or beats state-of-the-art, as evaluated by performance on recall@$k$ and few-shot learning tasks. Our method also obtains performance superior to a variety of alternatives on disentangling, as evaluated by two key properties of a disentangled representation: modularity and explicitness. The goal of our work is to obtain more interpretable, manipulable, and generalizable deep representations of concepts and categories.", "bibtex": "@inproceedings{NEURIPS2018_2b24d495,\n author = {Ridgeway, Karl and Mozer, Michael C},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Deep Disentangled Embeddings With the F-Statistic Loss},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2b24d495052a8ce66358eb576b8912c8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2b24d495052a8ce66358eb576b8912c8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2b24d495052a8ce66358eb576b8912c8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2b24d495052a8ce66358eb576b8912c8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2b24d495052a8ce66358eb576b8912c8-Reviews.html", "metareview": "", "pdf_size": 338237, "gs_citation": 257, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6356721257291693745&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, University of Colorado + Sensory, Inc.; Department of Computer Science, University of Colorado", "aff_domain": "colorado.edu;colorado.edu", "email": "colorado.edu;colorado.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2b24d495052a8ce66358eb576b8912c8-Abstract.html", "aff_unique_index": "0+1;0", "aff_unique_norm": "University of Colorado;Sensory, Inc.", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.colorado.edu;https://www.sensoryinc.com", "aff_unique_abbr": "CU;Sensory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0", "aff_country_unique": "United States" }, { "title": "Learning Disentangled Joint Continuous and Discrete Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11093", "id": "11093", "author": "Emilien Dupont", "abstract": "We present a framework for learning disentangled and interpretable jointly continuous and discrete representations in an unsupervised manner. By augmenting the continuous latent distribution of variational autoencoders with a relaxed discrete distribution and controlling the amount of information encoded in each latent unit, we show how continuous and categorical factors of variation can be discovered automatically from data. Experiments show that the framework disentangles continuous and discrete generative factors on various datasets and outperforms current disentangling methods when a discrete generative factor is prominent.", "bibtex": "@inproceedings{NEURIPS2018_b9228e09,\n author = {Dupont, Emilien},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Disentangled Joint Continuous and Discrete Representations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b9228e0962a78b84f3d5d92f4faa000b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b9228e0962a78b84f3d5d92f4faa000b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b9228e0962a78b84f3d5d92f4faa000b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b9228e0962a78b84f3d5d92f4faa000b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b9228e0962a78b84f3d5d92f4faa000b-Reviews.html", "metareview": "", "pdf_size": 2252103, "gs_citation": 287, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14996308996785863098&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Schlumberger Software Technology Innovation Center, Menlo Park, CA, USA", "aff_domain": "slb.com", "email": "slb.com", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b9228e0962a78b84f3d5d92f4faa000b-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Schlumberger", "aff_unique_dep": "Software Technology Innovation Center", "aff_unique_url": "https://www.slb.com", "aff_unique_abbr": "", "aff_campus_unique_index": "0", "aff_campus_unique": "Menlo Park", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Learning Gaussian Processes by Minimizing PAC-Bayesian Generalization Bounds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11336", "id": "11336", "author_site": "David Reeb, Andreas Doerr, Sebastian Gerwinn, Barbara Rakitsch", "author": "David Reeb; Andreas Doerr; Sebastian Gerwinn; Barbara Rakitsch", "abstract": "Gaussian Processes (GPs) are a generic modelling tool for supervised learning. While they have been successfully applied on large datasets, their use in safety-critical applications is hindered by the lack of good performance guarantees. To this end, we propose a method to learn GPs and their sparse approximations by directly optimizing a PAC-Bayesian bound on their generalization performance, instead of maximizing the marginal likelihood. Besides its theoretical appeal, we find in our evaluation that our learning method is robust and yields significantly better generalization guarantees than other common GP approaches on several regression benchmark datasets.", "bibtex": "@inproceedings{NEURIPS2018_d43ab110,\n author = {Reeb, David and Doerr, Andreas and Gerwinn, Sebastian and Rakitsch, Barbara},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Gaussian Processes by Minimizing PAC-Bayesian Generalization Bounds},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d43ab110ab2489d6b9b2caa394bf920f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d43ab110ab2489d6b9b2caa394bf920f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d43ab110ab2489d6b9b2caa394bf920f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d43ab110ab2489d6b9b2caa394bf920f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d43ab110ab2489d6b9b2caa394bf920f-Reviews.html", "metareview": "", "pdf_size": 1188215, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10486427122061554310&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "https://www.bosch-ai.com", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d43ab110ab2489d6b9b2caa394bf920f-Abstract.html" }, { "title": "Learning Hierarchical Semantic Image Manipulation through Structured Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11278", "id": "11278", "author_site": "Seunghoon Hong, Xinchen Yan, Thomas Huang, Honglak Lee", "author": "Seunghoon Hong; Xinchen Yan; Thomas S. Huang; Honglak Lee", "abstract": "Understanding, reasoning, and manipulating semantic concepts of images have been a fundamental research problem for decades. Previous work mainly focused on direct manipulation of natural image manifold through color strokes, key-points, textures, and holes-to-fill. In this work, we present a novel hierarchical framework for semantic image manipulation. Key to our hierarchical framework is that we employ structured semantic layout as our intermediate representations for manipulation. Initialized with coarse-level bounding boxes, our layout generator first creates pixel-wise semantic layout capturing the object shape, object-object interactions, and object-scene relations. Then our image generator fills in the pixel-level textures guided by the semantic layout. Such framework allows a user to manipulate images at object-level by adding, removing, and moving one bounding box at a time. Experimental evaluations demonstrate the advantages of the hierarchical manipulation framework over existing image generation and context hole-filing models, both qualitatively and quantitatively. Benefits of the hierarchical framework are further demonstrated in applications such as semantic object manipulation, interactive image editing, and data-driven image manipulation.", "bibtex": "@inproceedings{NEURIPS2018_602d1305,\n author = {Hong, Seunghoon and Yan, Xinchen and Huang, Thomas S and Lee, Honglak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Hierarchical Semantic Image Manipulation through Structured Representations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/602d1305678a8d5fdb372271e980da6a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/602d1305678a8d5fdb372271e980da6a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/602d1305678a8d5fdb372271e980da6a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/602d1305678a8d5fdb372271e980da6a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/602d1305678a8d5fdb372271e980da6a-Reviews.html", "metareview": "", "pdf_size": 4432730, "gs_citation": 97, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9543839569432033381&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/602d1305678a8d5fdb372271e980da6a-Abstract.html" }, { "title": "Learning Invariances using the Marginal Likelihood", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11943", "id": "11943", "author_site": "Mark van der Wilk, Matthias Bauer, ST John, James Hensman", "author": "Mark van der Wilk; Matthias Bauer; ST John; James Hensman", "abstract": "In many supervised learning tasks, learning what changes do not affect the predic-tion target is as crucial to generalisation as learning what does. Data augmentationis a common way to enforce a model to exhibit an invariance: training data is modi-fied according to an invariance designed by a human and added to the training data.We argue that invariances should be incorporated the model structure, and learnedusing themarginal likelihood, which can correctly reward the reduced complexityof invariant models. We incorporate invariances in a Gaussian process, due to goodmarginal likelihood approximations being available for these models. Our maincontribution is a derivation for a variational inference scheme for invariant Gaussianprocesses where the invariance is described by a probability distribution that canbe sampled from, much like how data augmentation is implemented in practice", "bibtex": "@inproceedings{NEURIPS2018_d465f14a,\n author = {van der Wilk, Mark and Bauer, Matthias and John, ST and Hensman, James},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Invariances using the Marginal Likelihood},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d465f14a648b3d0a1faa6f447e526c60-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d465f14a648b3d0a1faa6f447e526c60-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d465f14a648b3d0a1faa6f447e526c60-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d465f14a648b3d0a1faa6f447e526c60-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d465f14a648b3d0a1faa6f447e526c60-Reviews.html", "metareview": "", "pdf_size": 654551, "gs_citation": 103, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10307043604548436278&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "PROWLER.io, Cambridge, UK; MPI for Intelligent Systems + University of Cambridge; PROWLER.io, Cambridge, UK; PROWLER.io, Cambridge, UK", "aff_domain": "prowler.io;cam.ac.uk;prowler.io;prowler.io", "email": "prowler.io;cam.ac.uk;prowler.io;prowler.io", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d465f14a648b3d0a1faa6f447e526c60-Abstract.html", "aff_unique_index": "0;1+2;0;0", "aff_unique_norm": "PROWLER.io;Max Planck Institute for Intelligent Systems;University of Cambridge", "aff_unique_dep": ";;", "aff_unique_url": "https://prowler.io;https://www.mpi-is.mpg.de;https://www.cam.ac.uk", "aff_unique_abbr": ";MPI-IS;Cambridge", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1+0;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Learning Latent Subspaces in Variational Autoencoders", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11623", "id": "11623", "author_site": "Jack Klys, Jake Snell, Richard Zemel", "author": "Jack Klys; Jake Snell; Richard Zemel", "abstract": "Variational autoencoders (VAEs) are widely used deep generative models capable of learning unsupervised latent representations of data. Such representations are often difficult to interpret or control. We consider the problem of unsupervised learning of features correlated to specific labels in a dataset. We propose a VAE-based generative model which we show is capable of extracting features correlated to binary labels in the data and structuring it in a latent subspace which is easy to interpret. Our model, the Conditional Subspace VAE (CSVAE), uses mutual information minimization to learn a low-dimensional latent subspace associated with each label that can easily be inspected and independently manipulated. We demonstrate the utility of the learned representations for attribute manipulation tasks on both the Toronto Face and CelebA datasets.", "bibtex": "@inproceedings{NEURIPS2018_73e5080f,\n author = {Klys, Jack and Snell, Jake and Zemel, Richard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Latent Subspaces in Variational Autoencoders},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/73e5080f0f3804cb9cf470a8ce895dac-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/73e5080f0f3804cb9cf470a8ce895dac-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/73e5080f0f3804cb9cf470a8ce895dac-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/73e5080f0f3804cb9cf470a8ce895dac-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/73e5080f0f3804cb9cf470a8ce895dac-Reviews.html", "metareview": "", "pdf_size": 1675898, "gs_citation": 178, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1594746329554642510&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "University of Toronto; University of Toronto; University of Toronto", "aff_domain": "cs.toronto.edu;cs.toronto.edu;cs.toronto.edu", "email": "cs.toronto.edu;cs.toronto.edu;cs.toronto.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/73e5080f0f3804cb9cf470a8ce895dac-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Learning Libraries of Subroutines for Neurally\u2013Guided Bayesian Program Induction", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11749", "id": "11749", "author_site": "Kevin Ellis, Lucas Morales, Mathias Sabl\u00e9-Meyer, Armando Solar-Lezama, Josh Tenenbaum", "author": "Kevin Ellis; Lucas Morales; Mathias Sabl\u00e9-Meyer; Armando Solar-Lezama; Josh Tenenbaum", "abstract": "Successful approaches to program induction require a hand-engineered\n domain-specific language (DSL), constraining the space of allowed\n programs and imparting prior knowledge of the domain. We contribute\n a program induction algorithm that learns a DSL while\n jointly training a neural network to efficiently search for programs\n in the learned DSL. We use our model to synthesize functions on lists,\n edit text, and solve symbolic regression problems, showing how the\n model learns a domain-specific library of program components for\n expressing solutions to problems in the domain.", "bibtex": "@inproceedings{NEURIPS2018_7aa685b3,\n author = {Ellis, Kevin and Morales, Lucas and Sabl\\'{e}-Meyer, Mathias and Solar-Lezama, Armando and Tenenbaum, Josh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Libraries of Subroutines for Neurally\\textendash Guided Bayesian Program Induction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7aa685b3b1dc1d6780bf36f7340078c9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7aa685b3b1dc1d6780bf36f7340078c9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7aa685b3b1dc1d6780bf36f7340078c9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7aa685b3b1dc1d6780bf36f7340078c9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7aa685b3b1dc1d6780bf36f7340078c9-Reviews.html", "metareview": "", "pdf_size": 393061, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10177896852320958070&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "MIT; MIT; ENS Paris-Saclay; MIT; MIT", "aff_domain": "mit.edu;mit.edu;mit.edu;csail.mit.edu;mit.edu", "email": "mit.edu;mit.edu;mit.edu;csail.mit.edu;mit.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7aa685b3b1dc1d6780bf36f7340078c9-Abstract.html", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;\u00c9cole Normale Sup\u00e9rieure Paris-Saclay", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.ensparis-saclay.fr", "aff_unique_abbr": "MIT;ENS Paris-Saclay", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris-Saclay", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;France" }, { "title": "Learning Loop Invariants for Program Verification", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11744", "id": "11744", "author_site": "Xujie Si, Hanjun Dai, Mukund Raghothaman, Mayur Naik, Le Song", "author": "Xujie Si; Hanjun Dai; Mukund Raghothaman; Mayur Naik; Le Song", "abstract": "A fundamental problem in program verification concerns inferring loop invariants. The problem is undecidable and even practical instances are challenging. Inspired by how human experts construct loop invariants, we propose a reasoning framework Code2Inv that constructs the solution by multi-step decision making and querying an external program graph memory block. By training with reinforcement learning, Code2Inv captures rich program features and avoids the need for ground truth solutions as supervision. Compared to previous learning tasks in domains with graph-structured data, it addresses unique challenges, such as a binary objective function and an extremely sparse reward that is given by an automated theorem prover only after the complete loop invariant is proposed. We evaluate Code2Inv on a suite of 133 benchmark problems and compare it to three state-of-the-art systems. It solves 106 problems compared to 73 by a stochastic search-based system, 77 by a heuristic search-based system, and 100 by a decision tree learning-based system. Moreover, the strategy learned can be generalized to new programs: compared to solving new instances from scratch, the pre-trained agent is more sample efficient in finding solutions.", "bibtex": "@inproceedings{NEURIPS2018_65b1e92c,\n author = {Si, Xujie and Dai, Hanjun and Raghothaman, Mukund and Naik, Mayur and Song, Le},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Loop Invariants for Program Verification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/65b1e92c585fd4c2159d5f33b5030ff2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/65b1e92c585fd4c2159d5f33b5030ff2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/65b1e92c585fd4c2159d5f33b5030ff2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/65b1e92c585fd4c2159d5f33b5030ff2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/65b1e92c585fd4c2159d5f33b5030ff2-Reviews.html", "metareview": "", "pdf_size": 1208958, "gs_citation": 185, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6954633128371638771&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "University of Pennsylvania; Georgia Tech; University of Pennsylvania; University of Pennsylvania; Georgia Tech + Ant Financial", "aff_domain": "cis.upenn.edu;gatech.edu;cis.upenn.edu;cis.upenn.edu;cc.gatech.edu", "email": "cis.upenn.edu;gatech.edu;cis.upenn.edu;cis.upenn.edu;cc.gatech.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/65b1e92c585fd4c2159d5f33b5030ff2-Abstract.html", "aff_unique_index": "0;1;0;0;1+2", "aff_unique_norm": "University of Pennsylvania;Georgia Institute of Technology;Ant Financial", "aff_unique_dep": ";;", "aff_unique_url": "https://www.upenn.edu;https://www.gatech.edu;https://www.antgroup.com", "aff_unique_abbr": "UPenn;Georgia Tech;Ant Financial", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0+1", "aff_country_unique": "United States;China" }, { "title": "Learning Optimal Reserve Price against Non-myopic Bidders", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11216", "id": "11216", "author_site": "Jinyan Liu, Zhiyi Huang, Xiangning Wang", "author": "Jinyan Liu; Zhiyi Huang; Xiangning Wang", "abstract": "We consider the problem of learning optimal reserve price in repeated auctions against non-myopic bidders, who may bid strategically in order to gain in future rounds even if the single-round auctions are truthful. Previous algorithms, e.g., empirical pricing, do not provide non-trivial regret rounds in this setting in general. We introduce algorithms that obtain small regret against non-myopic bidders either when the market is large, i.e., no bidder appears in a constant fraction of the rounds, or when the bidders are impatient, i.e., they discount future utility by some factor mildly bounded away from one. Our approach carefully controls what information is revealed to each bidder, and builds on techniques from differentially private online learning as well as the recent line of works on jointly differentially private algorithms.", "bibtex": "@inproceedings{NEURIPS2018_93d65641,\n author = {Liu, Jinyan and Huang, Zhiyi and Wang, Xiangning},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Optimal Reserve Price against Non-myopic Bidders},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/93d65641ff3f1586614cf2c1ad240b6c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/93d65641ff3f1586614cf2c1ad240b6c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/93d65641ff3f1586614cf2c1ad240b6c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/93d65641ff3f1586614cf2c1ad240b6c-Reviews.html", "metareview": "", "pdf_size": 341947, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17381223435832936002&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science, The University of Hong Kong; Department of Computer Science, The University of Hong Kong; Department of Computer Science, The University of Hong Kong", "aff_domain": "cs.hku.hk;cs.hku.hk;cs.hku.hk", "email": "cs.hku.hk;cs.hku.hk;cs.hku.hk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/93d65641ff3f1586614cf2c1ad240b6c-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Hong Kong", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.hku.hk", "aff_unique_abbr": "HKU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Learning Others' Intentional Models in Multi-Agent Settings Using Interactive POMDPs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11549", "id": "11549", "author_site": "Yanlin Han, Piotr Gmytrasiewicz", "author": "Yanlin Han; Piotr Gmytrasiewicz", "abstract": "Interactive partially observable Markov decision processes (I-POMDPs) provide a principled framework for planning and acting in a partially observable, stochastic and multi-agent environment. It extends POMDPs to multi-agent settings by including models of other agents in the state space and forming a hierarchical belief structure. In order to predict other agents' actions using I-POMDPs, we propose an approach that effectively uses Bayesian inference and sequential Monte Carlo sampling to learn others' intentional models which ascribe to them beliefs, preferences and rationality in action selection. Empirical results show that our algorithm accurately learns models of the other agent and has superior performance than methods that use subintentional models. Our approach serves as a generalized Bayesian learning algorithm that learns other agents' beliefs, strategy levels, and transition, observation and reward functions. It also effectively mitigates the belief space complexity due to the nested belief hierarchy.", "bibtex": "@inproceedings{NEURIPS2018_65fc9fb4,\n author = {Han, Yanlin and Gmytrasiewicz, Piotr},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Others\\textquotesingle Intentional Models in Multi-Agent Settings Using Interactive POMDPs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/65fc9fb4897a89789352e211ca2d398f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/65fc9fb4897a89789352e211ca2d398f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/65fc9fb4897a89789352e211ca2d398f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/65fc9fb4897a89789352e211ca2d398f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/65fc9fb4897a89789352e211ca2d398f-Reviews.html", "metareview": "", "pdf_size": 751070, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5469048292013925859&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, University of Illinois at Chicago; Department of Computer Science, University of Illinois at Chicago", "aff_domain": "uic.edu;uic.edu", "email": "uic.edu;uic.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/65fc9fb4897a89789352e211ca2d398f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Overparameterized Neural Networks via Stochastic Gradient Descent on Structured Data", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11781", "id": "11781", "author_site": "Yuanzhi Li, Yingyu Liang", "author": "Yuanzhi Li; Yingyu Liang", "abstract": "Neural networks have many successful applications, while much less theoretical understanding has been gained. Towards bridging this gap, we study the problem of learning a two-layer overparameterized ReLU neural network for multi-class classification via stochastic gradient descent (SGD) from random initialization. In the overparameterized setting, when the data comes from mixtures of well-separated distributions, we prove that SGD learns a network with a small generalization error, albeit the network has enough capacity to fit arbitrary labels. Furthermore, the analysis provides interesting insights into several aspects of learning neural networks and can be verified based on empirical studies on synthetic data and on the MNIST dataset.", "bibtex": "@inproceedings{NEURIPS2018_54fe976b,\n author = {Li, Yuanzhi and Liang, Yingyu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Overparameterized Neural Networks via Stochastic Gradient Descent on Structured Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/54fe976ba170c19ebae453679b362263-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/54fe976ba170c19ebae453679b362263-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/54fe976ba170c19ebae453679b362263-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/54fe976ba170c19ebae453679b362263-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/54fe976ba170c19ebae453679b362263-Reviews.html", "metareview": "", "pdf_size": 573419, "gs_citation": 787, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2127738951767879649&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Computer Science Department, Stanford University; Department of Computer Sciences, University of Wisconsin-Madison", "aff_domain": "stanford.edu;cs.wisc.edu", "email": "stanford.edu;cs.wisc.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/54fe976ba170c19ebae453679b362263-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Stanford University;University of Wisconsin-Madison", "aff_unique_dep": "Computer Science Department;Department of Computer Sciences", "aff_unique_url": "https://www.stanford.edu;https://www.wisc.edu", "aff_unique_abbr": "Stanford;UW-Madison", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Pipelines with Limited Data and Domain Knowledge: A Study in Parsing Physics Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11041", "id": "11041", "author_site": "Mrinmaya Sachan, Kumar Avinava Dubey, Tom Mitchell, Dan Roth, Eric Xing", "author": "Mrinmaya Sachan; Kumar Avinava Dubey; Tom M. Mitchell; Dan Roth; Eric P Xing", "abstract": "As machine learning becomes more widely used in practice, we need new methods to build complex intelligent systems that integrate learning with existing software, and with domain knowledge encoded as rules. As a case study, we present such a system that learns to parse Newtonian physics problems in textbooks. This system, Nuts&Bolts, learns a pipeline process that incorporates existing code, pre-learned machine learning models, and human engineered rules. It jointly trains the entire pipeline to prevent propagation of errors, using a combination of labelled and unlabelled data. Our approach achieves a good performance on the parsing task, outperforming the simple pipeline and its variants. Finally, we also show how Nuts&Bolts can be used to achieve improvements on a relation extraction task and on the end task of answering Newtonian physics problems.", "bibtex": "@inproceedings{NEURIPS2018_ac627ab1,\n author = {Sachan, Mrinmaya and Dubey, Kumar Avinava and Mitchell, Tom M and Roth, Dan and Xing, Eric P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Pipelines with Limited Data and Domain Knowledge: A Study in Parsing Physics Problems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ac627ab1ccbdb62ec96e702f07f6425b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ac627ab1ccbdb62ec96e702f07f6425b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ac627ab1ccbdb62ec96e702f07f6425b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ac627ab1ccbdb62ec96e702f07f6425b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ac627ab1ccbdb62ec96e702f07f6425b-Reviews.html", "metareview": "", "pdf_size": 611711, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=143598108710342819&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Machine Learning Department, School of Computer Science, Carnegie Mellon University; Machine Learning Department, School of Computer Science, Carnegie Mellon University; Machine Learning Department, School of Computer Science, Carnegie Mellon University; Department of Computer and Information Science, University of Pennsylvania; Machine Learning Department, School of Computer Science, Carnegie Mellon University + Petuum Inc.", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;seas.upenn.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;seas.upenn.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ac627ab1ccbdb62ec96e702f07f6425b-Abstract.html", "aff_unique_index": "0;0;0;1;0+2", "aff_unique_norm": "Carnegie Mellon University;University of Pennsylvania;Petuum Inc.", "aff_unique_dep": "Machine Learning Department;Department of Computer and Information Science;", "aff_unique_url": "https://www.cmu.edu;https://www.upenn.edu;https://www.petuum.com", "aff_unique_abbr": "CMU;UPenn;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0+0", "aff_country_unique": "United States" }, { "title": "Learning Plannable Representations with Causal InfoGAN", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11834", "id": "11834", "author_site": "Thanard Kurutach, Aviv Tamar, Ge Yang, Stuart Russell, Pieter Abbeel", "author": "Thanard Kurutach; Aviv Tamar; Ge Yang; Stuart Russell; Pieter Abbeel", "abstract": "In recent years, deep generative models have been shown to 'imagine' convincing high-dimensional observations such as images, audio, and even video, learning directly from raw data. In this work, we ask how to imagine goal-directed visual plans -- a plausible sequence of observations that transition a dynamical system from its current configuration to a desired goal state, which can later be used as a reference trajectory for control. We focus on systems with high-dimensional observations, such as images, and propose an approach that naturally combines representation learning and planning. Our framework learns a generative model of sequential observations, where the generative process is induced by a transition in a low-dimensional planning model, and an additional noise. By maximizing the mutual information between the generated observations and the transition in the planning model, we obtain a low-dimensional representation that best explains the causal nature of the data. We structure the planning model to be compatible with efficient planning algorithms, and we propose several such models based on either discrete or continuous states. Finally, to generate a visual plan, we project the current and goal observations onto their respective states in the planning model, plan a trajectory, and then use the generative model to transform the trajectory to a sequence of observations. We demonstrate our method on imagining plausible visual plans of rope manipulation.", "bibtex": "@inproceedings{NEURIPS2018_08aac6ac,\n author = {Kurutach, Thanard and Tamar, Aviv and Yang, Ge and Russell, Stuart J and Abbeel, Pieter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Plannable Representations with Causal InfoGAN},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/08aac6ac98e59e523995c161e57875f5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/08aac6ac98e59e523995c161e57875f5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/08aac6ac98e59e523995c161e57875f5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/08aac6ac98e59e523995c161e57875f5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/08aac6ac98e59e523995c161e57875f5-Reviews.html", "metareview": "", "pdf_size": 1911838, "gs_citation": 228, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11334480747970611889&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "Berkeley AI Research, University of California, Berkeley; Berkeley AI Research, University of California, Berkeley; Department of Physics, University of Chicago; Berkeley AI Research, University of California, Berkeley; Berkeley AI Research, University of California, Berkeley", "aff_domain": ";;;;", "email": ";;;;", "github": "http://github.com/thanard/causal-infogan", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/08aac6ac98e59e523995c161e57875f5-Abstract.html", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of California, Berkeley;University of Chicago", "aff_unique_dep": "Berkeley AI Research;Department of Physics", "aff_unique_url": "https://www.berkeley.edu;https://www.uchicago.edu", "aff_unique_abbr": "UC Berkeley;UChicago", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning SMaLL Predictors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11870", "id": "11870", "author_site": "Vikas Garg, Ofer Dekel, Lin Xiao", "author": "Vikas Garg; Ofer Dekel; Lin Xiao", "abstract": "We introduce a new framework for learning in severely resource-constrained settings. Our technique delicately amalgamates the representational richness of multiple linear predictors with the sparsity of Boolean relaxations, and thereby yields classifiers that are compact, interpretable, and accurate. We provide a rigorous formalism of the learning problem, and establish fast convergence of the ensuing algorithm via relaxation to a minimax saddle point objective. We supplement the theoretical foundations of our work with an extensive empirical evaluation.", "bibtex": "@inproceedings{NEURIPS2018_03b2ceb7,\n author = {Garg, Vikas and Dekel, Ofer and Xiao, Lin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning SMaLL Predictors},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/03b2ceb73723f8b53cd533e4fba898ee-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/03b2ceb73723f8b53cd533e4fba898ee-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/03b2ceb73723f8b53cd533e4fba898ee-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/03b2ceb73723f8b53cd533e4fba898ee-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/03b2ceb73723f8b53cd533e4fba898ee-Reviews.html", "metareview": "", "pdf_size": 871634, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9463459123557907907&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "CSAIL, MIT; Microsoft Research; Microsoft Research", "aff_domain": "csail.mit.edu;microsoft.com;microsoft.com", "email": "csail.mit.edu;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/03b2ceb73723f8b53cd533e4fba898ee-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory;Microsoft Research", "aff_unique_url": "https://www.csail.mit.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MIT;MSR", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Safe Policies with Expert Guidance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11868", "id": "11868", "author_site": "Jessie Huang, Fa Wu, Doina Precup, Yang Cai", "author": "Jessie Huang; Fa Wu; Doina Precup; Yang Cai", "abstract": "We propose a framework for ensuring safe behavior of a reinforcement learning agent when the reward function may be difficult to specify. In order to do this, we rely on the existence of demonstrations from expert policies, and we provide a theoretical framework for the agent to optimize in the space of rewards consistent with its existing knowledge. We propose two methods to solve the resulting optimization: an exact ellipsoid-based method and a method in the spirit of the \"follow-the-perturbed-leader\" algorithm. Our experiments demonstrate the behavior of our algorithm in both discrete and continuous problems. The trained agent safely avoids states with potential negative effects while imitating the behavior of the expert in the other states.", "bibtex": "@inproceedings{NEURIPS2018_a89b71bb,\n author = {Huang, Jessie and Wu, Fa and Precup, Doina and Cai, Yang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Safe Policies with Expert Guidance},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a89b71bb5227c75d463dd82a03115738-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a89b71bb5227c75d463dd82a03115738-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a89b71bb5227c75d463dd82a03115738-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a89b71bb5227c75d463dd82a03115738-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a89b71bb5227c75d463dd82a03115738-Reviews.html", "metareview": "", "pdf_size": 706478, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15845083224460981643&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "School of Computer Science, McGill University; School of Computer Science, McGill University + Zhejiang Demetics Medical Technology; School of Computer Science, McGill University; School of Computer Science, McGill University", "aff_domain": "mcgill.ca;mcgill.ca;cs.mcgill.ca;cs.mcgill.ca", "email": "mcgill.ca;mcgill.ca;cs.mcgill.ca;cs.mcgill.ca", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a89b71bb5227c75d463dd82a03115738-Abstract.html", "aff_unique_index": "0;0+1;0;0", "aff_unique_norm": "McGill University;Zhejiang Demetics Medical Technology", "aff_unique_dep": "School of Computer Science;", "aff_unique_url": "https://www.mcgill.ca;", "aff_unique_abbr": "McGill;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;0+1;0;0", "aff_country_unique": "Canada;China" }, { "title": "Learning Signed Determinantal Point Processes through the Principal Minor Assignment Problem", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11709", "id": "11709", "author": "Victor-Emmanuel Brunel", "abstract": "Symmetric determinantal point processes (DPP) are a class of probabilistic models that encode the random selection of items that have a repulsive behavior. They have attracted a lot of attention in machine learning, where returning diverse sets of items is sought for. Sampling and learning these symmetric DPP's is pretty well understood. In this work, we consider a new class of DPP's, which we call signed DPP's, where we break the symmetry and allow attractive behaviors. We set the ground for learning signed DPP's through a method of moments, by solving the so called principal assignment problem for a class of matrices $K$ that satisfy $K_{i,j}=\\pm K_{j,i}$, $i\\neq j$, in polynomial time.", "bibtex": "@inproceedings{NEURIPS2018_e1228be4,\n author = {Brunel, Victor-Emmanuel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Signed Determinantal Point Processes through the Principal Minor Assignment Problem},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e1228be46de6a0234ac22ded31417bc7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e1228be46de6a0234ac22ded31417bc7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e1228be46de6a0234ac22ded31417bc7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e1228be46de6a0234ac22ded31417bc7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e1228be46de6a0234ac22ded31417bc7-Reviews.html", "metareview": "", "pdf_size": 340824, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11728969756427373440&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e1228be46de6a0234ac22ded31417bc7-Abstract.html" }, { "title": "Learning Task Specifications from Demonstrations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11524", "id": "11524", "author_site": "Marcell Vazquez-Chanlatte, Susmit Jha, Ashish Tiwari, Mark Ho, Sanjit Seshia", "author": "Marcell Vazquez-Chanlatte; Susmit Jha; Ashish Tiwari; Mark K Ho; Sanjit Seshia", "abstract": "Real-world applications often naturally decompose into several\n sub-tasks. In many settings (e.g., robotics) demonstrations provide\n a natural way to specify the sub-tasks. However, most methods for\n learning from demonstrations either do not provide guarantees that\n the artifacts learned for the sub-tasks can be safely recombined or\n limit the types of composition available. Motivated by this\n deficit, we consider the problem of inferring Boolean non-Markovian\n rewards (also known as logical trace properties or\n specifications) from demonstrations provided by an agent\n operating in an uncertain, stochastic environment. Crucially,\n specifications admit well-defined composition rules that are\n typically easy to interpret. In this paper, we formulate the\n specification inference task as a maximum a posteriori (MAP)\n probability inference problem, apply the principle of maximum\n entropy to derive an analytic demonstration likelihood model and\n give an efficient approach to search for the most likely\n specification in a large candidate pool of specifications. In our\n experiments, we demonstrate how learning specifications can help\n avoid common problems that often arise due to ad-hoc reward composition.", "bibtex": "@inproceedings{NEURIPS2018_74934548,\n author = {Vazquez-Chanlatte, Marcell and Jha, Susmit and Tiwari, Ashish and Ho, Mark K and Seshia, Sanjit},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Task Specifications from Demonstrations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/74934548253bcab8490ebd74afed7031-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/74934548253bcab8490ebd74afed7031-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/74934548253bcab8490ebd74afed7031-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/74934548253bcab8490ebd74afed7031-Reviews.html", "metareview": "", "pdf_size": 706959, "gs_citation": 96, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5595840445369553650&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "University of California, Berkeley; SRI International, Menlo Park; SRI International, Menlo Park; University of California, Berkeley; University of California, Berkeley", "aff_domain": "eecs.berkeley.edu;sri.com;sri.com;eecs.berkeley.edu;eecs.berkeley.edu", "email": "eecs.berkeley.edu;sri.com;sri.com;eecs.berkeley.edu;eecs.berkeley.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/74934548253bcab8490ebd74afed7031-Abstract.html", "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "University of California, Berkeley;SRI International", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.sri.com", "aff_unique_abbr": "UC Berkeley;SRI", "aff_campus_unique_index": "0;1;1;0;0", "aff_campus_unique": "Berkeley;Menlo Park", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Temporal Point Processes via Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12019", "id": "12019", "author_site": "Shuang Li, Shuai Xiao, Shixiang Zhu, Nan Du, Yao Xie, Le Song", "author": "Shuang Li; Shuai Xiao; Shixiang Zhu; Nan Du; Yao Xie; Le Song", "abstract": "Social goods, such as healthcare, smart city, and information networks, often produce ordered event data in continuous time. The generative processes of these event data can be very complex, requiring flexible models to capture their dynamics. Temporal point processes offer an elegant framework for modeling event data without discretizing the time. However, the existing maximum-likelihood-estimation (MLE) learning paradigm requires hand-crafting the intensity function beforehand and cannot directly monitor the goodness-of-fit of the estimated model in the process of training. To alleviate the risk of model-misspecification in MLE, we propose to generate samples from the generative model and monitor the quality of the samples in the process of training until the samples and the real data are indistinguishable. We take inspiration from reinforcement learning (RL) and treat the generation of each event as the action taken by a stochastic policy. We parameterize the policy as a flexible recurrent neural network and gradually improve the policy to mimic the observed event distribution. Since the reward function is unknown in this setting, we uncover an analytic and nonparametric form of the reward function using an inverse reinforcement learning formulation. This new RL framework allows us to derive an efficient policy gradient algorithm for learning flexible point process models, and we show that it performs well in both synthetic and real data.", "bibtex": "@inproceedings{NEURIPS2018_5d50d227,\n author = {Li, Shuang and Xiao, Shuai and Zhu, Shixiang and Du, Nan and Xie, Yao and Song, Le},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Temporal Point Processes via Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5d50d22735a7469266aab23fd8aeb536-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5d50d22735a7469266aab23fd8aeb536-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5d50d22735a7469266aab23fd8aeb536-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5d50d22735a7469266aab23fd8aeb536-Reviews.html", "metareview": "", "pdf_size": 2079473, "gs_citation": 134, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16855002767181200211&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Georgia Institute of Technology; Ant Financial; Georgia Institute of Technology; Google Brain; Georgia Institute of Technology; Georgia Institute of Technology+Ant Financial", "aff_domain": "gatech.edu; ; ; ;isye.gatech.edu;cc.gatech.edu", "email": "gatech.edu; ; ; ;isye.gatech.edu;cc.gatech.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5d50d22735a7469266aab23fd8aeb536-Abstract.html", "aff_unique_index": "0;1;0;2;0;0+1", "aff_unique_norm": "Georgia Institute of Technology;Ant Financial;Google", "aff_unique_dep": ";;Google Brain", "aff_unique_url": "https://www.gatech.edu;https://www.antgroup.com;https://brain.google.com", "aff_unique_abbr": "Georgia Tech;Ant Financial;Google Brain", "aff_campus_unique_index": "1;", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;0;0;0+1", "aff_country_unique": "United States;China" }, { "title": "Learning To Learn Around A Common Mean", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11964", "id": "11964", "author_site": "Giulia Denevi, Carlo Ciliberto, Dimitris Stamos, Massimiliano Pontil", "author": "Giulia Denevi; Carlo Ciliberto; Dimitris Stamos; Massimiliano Pontil", "abstract": "The problem of learning-to-learn (LTL) or meta-learning is gaining increasing attention due to recent empirical evidence of its effectiveness in applications. The goal addressed in LTL is to select an algorithm that works well on tasks sampled from a meta-distribution. In this work, we consider the family of algorithms given by a variant of Ridge Regression, in which the regularizer is the square distance to an unknown mean vector. We show that, in this setting, the LTL problem can be reformulated as a Least Squares (LS) problem and we exploit a novel meta- algorithm to efficiently solve it. At each iteration the meta-algorithm processes only one dataset. Specifically, it firstly estimates the stochastic LS objective function, by splitting this dataset into two subsets used to train and test the inner algorithm, respectively. Secondly, it performs a stochastic gradient step with the estimated value. Under specific assumptions, we present a bound for the generalization error of our meta-algorithm, which suggests the right splitting parameter to choose. When the hyper-parameters of the problem are fixed, this bound is consistent as the number of tasks grows, even if the sample size is kept constant. Preliminary experiments confirm our theoretical findings, highlighting the advantage of our approach, with respect to independent task learning.", "bibtex": "@inproceedings{NEURIPS2018_b9a25e42,\n author = {Denevi, Giulia and Ciliberto, Carlo and Stamos, Dimitris and Pontil, Massimiliano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning To Learn Around A Common Mean},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b9a25e422ba96f7572089a00b838c3f8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b9a25e422ba96f7572089a00b838c3f8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b9a25e422ba96f7572089a00b838c3f8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b9a25e422ba96f7572089a00b838c3f8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b9a25e422ba96f7572089a00b838c3f8-Reviews.html", "metareview": "", "pdf_size": 863793, "gs_citation": 113, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7708044257637298434&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b9a25e422ba96f7572089a00b838c3f8-Abstract.html" }, { "title": "Learning Versatile Filters for Efficient Convolutional Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11175", "id": "11175", "author_site": "Yunhe Wang, Chang Xu, Chunjing XU, Chao Xu, Dacheng Tao", "author": "Yunhe Wang; Chang Xu; Chunjing XU; Chao Xu; Dacheng Tao", "abstract": "This paper introduces versatile filters to construct efficient convolutional neural network. Considering the demands of efficient deep learning techniques running on cost-effective hardware, a number of methods have been developed to learn compact neural networks. Most of these works aim to slim down filters in different ways, e.g., investigating small, sparse or binarized filters. In contrast, we treat filters from an additive perspective. A series of secondary filters can be derived from a primary filter. These secondary filters all inherit in the primary filter without occupying more storage, but once been unfolded in computation they could significantly enhance the capability of the filter by integrating information extracted from different receptive fields. Besides spatial versatile filters, we additionally investigate versatile filters from the channel perspective. The new techniques are general to upgrade filters in existing CNNs. Experimental results on benchmark datasets and neural networks demonstrate that CNNs constructed with our versatile filters are able to achieve comparable accuracy as that of original filters, but require less memory and FLOPs.", "bibtex": "@inproceedings{NEURIPS2018_f0adc883,\n author = {Wang, Yunhe and Xu, Chang and XU, Chunjing and Xu, Chao and Tao, Dacheng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Versatile Filters for Efficient Convolutional Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f0adc8838f4bdedde4ec2cfad0515589-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f0adc8838f4bdedde4ec2cfad0515589-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f0adc8838f4bdedde4ec2cfad0515589-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f0adc8838f4bdedde4ec2cfad0515589-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f0adc8838f4bdedde4ec2cfad0515589-Reviews.html", "metareview": "", "pdf_size": 309433, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14079725458323492359&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Huawei Noah\u2019s Ark Lab; UBTECH Sydney AI Centre, SIT, FEIT, University of Sydney, Australia; Huawei Noah\u2019s Ark Lab; Key Lab of Machine Perception (MOE), Cooperative Medianet Innovation Center, School of EECS, Peking University, Beijing, China; UBTECH Sydney AI Centre, SIT, FEIT, University of Sydney, Australia", "aff_domain": "huawei.com;sydney.edu.au;huawei.com;cis.pku.edu.cn;sydney.edu.au", "email": "huawei.com;sydney.edu.au;huawei.com;cis.pku.edu.cn;sydney.edu.au", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f0adc8838f4bdedde4ec2cfad0515589-Abstract.html", "aff_unique_index": "0;1;0;2;1", "aff_unique_norm": "Huawei;University of Sydney;Peking University", "aff_unique_dep": "Noah\u2019s Ark Lab;Sydney AI Centre;School of EECS", "aff_unique_url": "https://www.huawei.com;https://www.sydney.edu.au;http://www.pku.edu.cn", "aff_unique_abbr": "Huawei;USYD;PKU", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Sydney;Beijing", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "China;Australia" }, { "title": "Learning a High Fidelity Pose Invariant Model for High-resolution Face Frontalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11293", "id": "11293", "author_site": "Jie Cao, Yibo Hu, Hongwen Zhang, Ran He, Zhenan Sun", "author": "Jie Cao; Yibo Hu; Hongwen Zhang; Ran He; Zhenan Sun", "abstract": "Face frontalization refers to the process of synthesizing the frontal view of a face from a given profile. Due to self-occlusion and appearance distortion in the wild, it is extremely challenging to recover faithful results and preserve texture details in a high-resolution. This paper proposes a High Fidelity Pose Invariant Model (HF-PIM) to produce photographic and identity-preserving results. HF-PIM frontalizes the profiles through a novel texture warping procedure and leverages a dense correspondence field to bind the 2D and 3D surface spaces. We decompose the prerequisite of warping into dense correspondence field estimation and facial texture map recovering, which are both well addressed by deep networks. Different from those reconstruction methods relying on 3D data, we also propose Adversarial Residual Dictionary Learning (ARDL) to supervise facial texture map recovering with only monocular images. Exhaustive experiments on both controlled and uncontrolled environments demonstrate that the proposed method not only boosts the performance of pose-invariant face recognition but also dramatically improves high-resolution frontalization appearances.", "bibtex": "@inproceedings{NEURIPS2018_1415db70,\n author = {Cao, Jie and Hu, Yibo and Zhang, Hongwen and He, Ran and Sun, Zhenan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning a High Fidelity Pose Invariant Model for High-resolution Face Frontalization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1415db70fe9ddb119e23e9b2808cde38-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1415db70fe9ddb119e23e9b2808cde38-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1415db70fe9ddb119e23e9b2808cde38-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1415db70fe9ddb119e23e9b2808cde38-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1415db70fe9ddb119e23e9b2808cde38-Reviews.html", "metareview": "", "pdf_size": 3815892, "gs_citation": 113, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1308772940780723982&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "National Laboratory of Pattern Recognition, CASIA; National Laboratory of Pattern Recognition, CASIA; National Laboratory of Pattern Recognition, CASIA; National Laboratory of Pattern Recognition, CASIA+Center for Research on Intelligent Perception and Computing, CASIA+Center for Excellence in Brain Science and Intelligence Technology, CASIA+University of Chinese Academy of Sciences, Beijing, 100049, China; National Laboratory of Pattern Recognition, CASIA+Center for Research on Intelligent Perception and Computing, CASIA+Center for Excellence in Brain Science and Intelligence Technology, CASIA+University of Chinese Academy of Sciences, Beijing, 100049, China", "aff_domain": "cripac.ia.ac.cn;cripac.ia.ac.cn;cripac.ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn", "email": "cripac.ia.ac.cn;cripac.ia.ac.cn;cripac.ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1415db70fe9ddb119e23e9b2808cde38-Abstract.html", "aff_unique_index": "0;0;0;0+1+1+2;0+1+1+2", "aff_unique_norm": "Chinese Academy of Sciences, Institute of Automation;Chinese Academy of Sciences Institute of Automation;University of Chinese Academy of Sciences", "aff_unique_dep": "National Laboratory of Pattern Recognition;Center for Research on Intelligent Perception and Computing;", "aff_unique_url": "http://www.ia.cas.cn;http://www.casia.ac.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CASIA;CASIA;UCAS", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0+0+0+0;0+0+0+0", "aff_country_unique": "China" }, { "title": "Learning a Warping Distance from Unlabeled Time Series Using Sequence Autoencoders", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11997", "id": "11997", "author_site": "Abubakar Abid, James Zou", "author": "Abubakar Abid; James Y Zou", "abstract": "Measuring similarities between unlabeled time series trajectories is an important problem in many domains such as medicine, economics, and vision. It is often unclear what is the appropriate metric to use because of the complex nature of noise in the trajectories (e.g. different sampling rates or outliers). Experts typically hand-craft or manually select a specific metric, such as Dynamic Time Warping (DTW), to apply on their data. In this paper, we propose an end-to-end framework, autowarp, that optimizes and learns a good metric given unlabeled trajectories. We define a flexible and differentiable family of warping metrics, which encompasses common metrics such as DTW, Edit Distance, Euclidean, etc. Autowarp then leverages the representation power of sequence autoencoders to optimize for a member of this warping family. The output is an metric which is easy to interpret and can be robustly learned from relatively few trajectories. In systematic experiments across different domains, we show that autowarp often outperforms hand-crafted trajectory similarity metrics.", "bibtex": "@inproceedings{NEURIPS2018_1a9dcba2,\n author = {Abid, Abubakar and Zou, James Y},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning a Warping Distance from Unlabeled Time Series Using Sequence Autoencoders},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1a9dcba2349fef2bb823c39e45dd6c96-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1a9dcba2349fef2bb823c39e45dd6c96-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1a9dcba2349fef2bb823c39e45dd6c96-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1a9dcba2349fef2bb823c39e45dd6c96-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1a9dcba2349fef2bb823c39e45dd6c96-Reviews.html", "metareview": "", "pdf_size": 1223883, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5248278568310725538&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff": "Stanford University; Stanford University", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1a9dcba2349fef2bb823c39e45dd6c96-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning a latent manifold of odor representations from neural responses in piriform cortex", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11525", "id": "11525", "author_site": "Anqi Wu, Stan Pashkovski, Sandeep Datta, Jonathan Pillow", "author": "Anqi Wu; Stan Pashkovski; Sandeep R Datta; Jonathan W Pillow", "abstract": "A major difficulty in studying the neural mechanisms underlying olfactory perception is the lack of obvious structure in the relationship between odorants and the neural activity patterns they elicit. Here we use odor-evoked responses in piriform cortex to identify a latent manifold specifying latent distance relationships between olfactory stimuli. Our approach is based on the Gaussian process latent variable model, and seeks to map odorants to points in a low-dimensional embedding space, where distances between points in the embedding space relate to the similarity of population responses they elicit. The model is specified by an explicit continuous mapping from a latent embedding space to the space of high-dimensional neural population firing rates via nonlinear tuning curves, each parametrized by a Gaussian process. Population responses are then generated by the addition of correlated, odor-dependent Gaussian noise. We fit this model to large-scale calcium fluorescence imaging measurements of population activity in layers 2 and 3 of mouse piriform cortex following the presentation of a diverse set of odorants. The model identifies a low-dimensional embedding of each odor, and a smooth tuning curve over the latent embedding space that accurately captures each neuron's response to different odorants. The model captures both signal and noise correlations across more than 500 neurons. We validate the model using a cross-validation analysis known as co-smoothing to show that the model can accurately predict the responses of a population of held-out neurons to test odorants.", "bibtex": "@inproceedings{NEURIPS2018_17b3c706,\n author = {Wu, Anqi and Pashkovski, Stan and Datta, Sandeep R and Pillow, Jonathan W},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning a latent manifold of odor representations from neural responses in piriform cortex},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/17b3c7061788dbe82de5abe9f6fe22b3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/17b3c7061788dbe82de5abe9f6fe22b3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/17b3c7061788dbe82de5abe9f6fe22b3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/17b3c7061788dbe82de5abe9f6fe22b3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/17b3c7061788dbe82de5abe9f6fe22b3-Reviews.html", "metareview": "", "pdf_size": 998822, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14494715644329326759&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Princeton Neuroscience Institute, Princeton University; Department of Neurobiology, Harvard Medical School; Department of Neurobiology, Harvard Medical School; Princeton Neuroscience Institute, Princeton University", "aff_domain": "princeton.edu;hms.harvard.edu;hms.harvard.edu;princeton.edu", "email": "princeton.edu;hms.harvard.edu;hms.harvard.edu;princeton.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/17b3c7061788dbe82de5abe9f6fe22b3-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Princeton University;Harvard Medical School", "aff_unique_dep": "Princeton Neuroscience Institute;Department of Neurobiology", "aff_unique_url": "https://www.princeton.edu;https://hms.harvard.edu", "aff_unique_abbr": "Princeton;HMS", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Princeton;Boston", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning and Inference in Hilbert Space with Quantum Graphical Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11978", "id": "11978", "author_site": "Siddarth Srinivasan, Carlton Downey, Byron Boots", "author": "Siddarth Srinivasan; Carlton Downey; Byron Boots", "abstract": "Quantum Graphical Models (QGMs) generalize classical graphical models by adopting the formalism for reasoning about uncertainty from quantum mechanics. Unlike classical graphical models, QGMs represent uncertainty with density matrices in complex Hilbert spaces. Hilbert space embeddings (HSEs) also generalize Bayesian inference in Hilbert spaces. We investigate the link between QGMs and HSEs and show that the sum rule and Bayes rule for QGMs are equivalent to the kernel sum rule in HSEs and a special case of Nadaraya-Watson kernel regression, respectively. We show that these operations can be kernelized, and use these insights to propose a Hilbert Space Embedding of Hidden Quantum Markov Models (HSE-HQMM) to model dynamics. We present experimental results showing that HSE-HQMMs are competitive with state-of-the-art models like LSTMs and PSRNNs on several datasets, while also providing a nonparametric method for maintaining a probability distribution over continuous-valued features.", "bibtex": "@inproceedings{NEURIPS2018_11704817,\n author = {Srinivasan, Siddarth and Downey, Carlton and Boots, Byron},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning and Inference in Hilbert Space with Quantum Graphical Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/11704817e347269b7254e744b5e22dac-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/11704817e347269b7254e744b5e22dac-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/11704817e347269b7254e744b5e22dac-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/11704817e347269b7254e744b5e22dac-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/11704817e347269b7254e744b5e22dac-Reviews.html", "metareview": "", "pdf_size": 1041302, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4971115408197607156&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "College of Computing, Georgia Tech, Atlanta, GA 30332; Department of Machine Learning, Carnegie Mellon University, Pittsburgh, PA 15213; College of Computing, Georgia Tech, Atlanta, GA 30332", "aff_domain": "gatech.edu;cs.cmu.edu;cc.gatech.edu", "email": "gatech.edu;cs.cmu.edu;cc.gatech.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/11704817e347269b7254e744b5e22dac-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Georgia Institute of Technology;Carnegie Mellon University", "aff_unique_dep": "College of Computing;Department of Machine Learning", "aff_unique_url": "https://www.gatech.edu;https://www.cmu.edu", "aff_unique_abbr": "Georgia Tech;CMU", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Atlanta;Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning and Testing Causal Models with Interventions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11899", "id": "11899", "author_site": "Jayadev Acharya, Arnab Bhattacharyya, Constantinos Daskalakis, Saravanan Kandasamy", "author": "Jayadev Acharya; Arnab Bhattacharyya; Constantinos Daskalakis; Saravanan Kandasamy", "abstract": "We consider testing and learning problems on causal Bayesian networks as defined by Pearl (Pearl, 2009). Given a causal Bayesian network M on a graph with n discrete variables and bounded in-degree and bounded ``confounded components'', we show that O(log n) interventions on an unknown causal Bayesian network X on the same graph, and O(n/epsilon^2) samples per intervention, suffice to efficiently distinguish whether X=M or whether there exists some intervention under which X and M are farther than epsilon in total variation distance. We also obtain sample/time/intervention efficient algorithms for: (i) testing the identity of two unknown causal Bayesian networks on the same graph; and (ii) learning a causal Bayesian network on a given graph. Although our algorithms are non-adaptive, we show that adaptivity does not help in general: Omega(log n) interventions are necessary for testing the identity of two unknown causal Bayesian networks on the same graph, even adaptively. Our algorithms are enabled by a new subadditivity inequality for the squared Hellinger distance between two causal Bayesian networks.", "bibtex": "@inproceedings{NEURIPS2018_78631a4b,\n author = {Acharya, Jayadev and Bhattacharyya, Arnab and Daskalakis, Constantinos and Kandasamy, Saravanan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning and Testing Causal Models with Interventions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/78631a4bb5303be54fa1cfdcb958c00a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/78631a4bb5303be54fa1cfdcb958c00a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/78631a4bb5303be54fa1cfdcb958c00a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/78631a4bb5303be54fa1cfdcb958c00a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/78631a4bb5303be54fa1cfdcb958c00a-Reviews.html", "metareview": "", "pdf_size": 399946, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2427113881547981133&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "School of ECE, Cornell University; National University of Singapore + Indian Institute of Science; EECS, MIT; STCS, Tata Institute of Fundamental Research", "aff_domain": "cornell.edu;iisc.ac.in;csail.mit.edu;gmail.com", "email": "cornell.edu;iisc.ac.in;csail.mit.edu;gmail.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/78631a4bb5303be54fa1cfdcb958c00a-Abstract.html", "aff_unique_index": "0;1+2;3;4", "aff_unique_norm": "Cornell University;National University of Singapore;Indian Institute of Science;Massachusetts Institute of Technology;Tata Institute of Fundamental Research", "aff_unique_dep": "School of Electrical and Computer Engineering;;;Electrical Engineering and Computer Science;School of Technology and Computer Science", "aff_unique_url": "https://www.cornell.edu;https://www.nus.edu.sg;https://www.iisc.ac.in;https://www.mit.edu;https://www.tifr.res.in", "aff_unique_abbr": "Cornell;NUS;IISc;MIT;TIFR", "aff_campus_unique_index": ";1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1+2;0;2", "aff_country_unique": "United States;Singapore;India" }, { "title": "Learning convex bounds for linear quadratic control policy synthesis", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11909", "id": "11909", "author_site": "Jack Umenberger, Thomas Sch\u00f6n", "author": "Jack Umenberger; Thomas B Sch\u00f6n", "abstract": "Learning to make decisions from observed data in dynamic environments remains a problem of fundamental importance in a numbers of fields, from artificial intelligence and robotics, to medicine and finance.\nThis paper concerns the problem of learning control policies for unknown linear dynamical systems so as to maximize a quadratic reward function.\nWe present a method to optimize the expected value of the reward over the posterior distribution of the unknown system parameters, given data.\nThe algorithm involves sequential convex programing, and enjoys reliable local convergence and robust stability guarantees.\nNumerical simulations and stabilization of a real-world inverted pendulum are used to demonstrate the approach, with strong performance and robustness properties observed in both.", "bibtex": "@inproceedings{NEURIPS2018_f610a13d,\n author = {Umenberger, Jack and Sch\\\"{o}n, Thomas B},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning convex bounds for linear quadratic control policy synthesis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f610a13de080fb8df6cf972fc01ad93f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f610a13de080fb8df6cf972fc01ad93f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f610a13de080fb8df6cf972fc01ad93f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f610a13de080fb8df6cf972fc01ad93f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f610a13de080fb8df6cf972fc01ad93f-Reviews.html", "metareview": "", "pdf_size": 688014, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff": "Department of Information Technology, Uppsala University, Sweden; Department of Information Technology, Uppsala University, Sweden", "aff_domain": "it.uu.se;it.uu.se", "email": "it.uu.se;it.uu.se", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f610a13de080fb8df6cf972fc01ad93f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Uppsala University", "aff_unique_dep": "Department of Information Technology", "aff_unique_url": "https://www.uu.se", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Sweden" }, { "title": "Learning convex polytopes with margin", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11556", "id": "11556", "author_site": "Lee-Ad Gottlieb, Eran Kaufman, Aryeh Kontorovich, Gabriel Nivasch", "author": "Lee-Ad Gottlieb; Eran Kaufman; Aryeh Kontorovich; Gabriel Nivasch", "abstract": "We present improved algorithm for properly learning convex polytopes in the\nrealizable PAC setting from data with a margin. Our learning algorithm constructs\na consistent polytope as an intersection of about t log t halfspaces with margins\nin time polynomial in t (where t is the number of halfspaces forming an optimal\npolytope).\nWe also identify distinct generalizations of the notion of margin from hyperplanes\nto polytopes and investigate how they relate geometrically; this result may be of\ninterest beyond the learning setting.", "bibtex": "@inproceedings{NEURIPS2018_22b1f2e0,\n author = {Gottlieb, Lee-Ad and Kaufman, Eran and Kontorovich, Aryeh and Nivasch, Gabriel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning convex polytopes with margin},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/22b1f2e0983160db6f7bb9f62f4dbb39-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/22b1f2e0983160db6f7bb9f62f4dbb39-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/22b1f2e0983160db6f7bb9f62f4dbb39-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/22b1f2e0983160db6f7bb9f62f4dbb39-Reviews.html", "metareview": "", "pdf_size": 337869, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1274475054154096306&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Ariel University; Ariel University; Ben-Gurion University; Ariel University", "aff_domain": "ariel.ac.il;gmail.com;bgu.sc.il;ariel.ac.il", "email": "ariel.ac.il;gmail.com;bgu.sc.il;ariel.ac.il", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/22b1f2e0983160db6f7bb9f62f4dbb39-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Ariel University;Ben-Gurion University of the Negev", "aff_unique_dep": ";", "aff_unique_url": "https://www.ariel.ac.il;https://www.bgu.ac.il", "aff_unique_abbr": "Ariel U;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Israel" }, { "title": "Learning filter widths of spectral decompositions with wavelets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11453", "id": "11453", "author_site": "Haidar Khan, Bulent Yener", "author": "Haidar Khan; Bulent Yener", "abstract": "Time series classification using deep neural networks, such as convolutional neural networks (CNN), operate on the spectral decomposition of the time series computed using a preprocessing step. This step can include a large number of hyperparameters, such as window length, filter widths, and filter shapes, each with a range of possible values that must be chosen using time and data intensive cross-validation procedures. We propose the wavelet deconvolution (WD) layer as an efficient alternative to this preprocessing step that eliminates a significant number of hyperparameters. The WD layer uses wavelet functions with adjustable scale parameters to learn the spectral decomposition directly from the signal. Using backpropagation, we show the scale parameters can be optimized with gradient descent. Furthermore, the WD layer adds interpretability to the learned time series classifier by exploiting the properties of the wavelet transform. In our experiments, we show that the WD layer can automatically extract the frequency content used to generate a dataset. The WD layer combined with a CNN applied to the phone recognition task on the TIMIT database achieves a phone error rate of 18.1\\%, a relative improvement of 4\\% over the baseline CNN. Experiments on a dataset where engineered features are not available showed WD+CNN is the best performing method. Our results show that the WD layer can improve neural network based time series classifiers both in accuracy and interpretability by learning directly from the input signal.", "bibtex": "@inproceedings{NEURIPS2018_1a3f91fe,\n author = {Khan, Haidar and Yener, Bulent},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning filter widths of spectral decompositions with wavelets},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1a3f91fead97497b1a96d6104ad339f6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1a3f91fead97497b1a96d6104ad339f6-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1a3f91fead97497b1a96d6104ad339f6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1a3f91fead97497b1a96d6104ad339f6-Reviews.html", "metareview": "", "pdf_size": 631639, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1195090452223114657&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, Rensselaer Polytechnic Institute, Troy, NY 12180; Department of Computer Science, Rensselaer Polytechnic Institute, Troy, NY 12180", "aff_domain": "rpi.edu;rpi.edu", "email": "rpi.edu;rpi.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1a3f91fead97497b1a96d6104ad339f6-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Rensselaer Polytechnic Institute", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.rpi.edu", "aff_unique_abbr": "RPI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Troy", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning from Group Comparisons: Exploiting Higher Order Interactions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11488", "id": "11488", "author_site": "Yao Li, Minhao Cheng, Kevin Fujii, Fushing Hsieh, Cho-Jui Hsieh", "author": "Yao Li; Minhao Cheng; Kevin Fujii; Fushing Hsieh; Cho-Jui Hsieh", "abstract": "We study the problem of learning from group comparisons, with applications in predicting outcomes of sports and online games. Most of the previous works in this area focus on learning individual effects---they assume each player has an underlying score, and the ''ability'' of the team is modeled by the sum of team members' scores. Therefore, all the current approaches cannot model deeper interaction between team members: some players perform much better if they play together, and some players perform poorly together. In this paper, we propose a new model that takes the player-interaction effects into consideration. However, under certain circumstances, the total number of individuals can be very large, and number of player interactions grows quadratically, which makes learning intractable. In this case, we propose a latent factor model, and show that the sample complexity of our model is bounded under mild assumptions. Finally, we show that our proposed models have much better prediction power on several E-sports datasets, and furthermore can be used to reveal interesting patterns that cannot be discovered by previous methods.", "bibtex": "@inproceedings{NEURIPS2018_82089746,\n author = {Li, Yao and Cheng, Minhao and Fujii, Kevin and Hsieh, Fushing and Hsieh, Cho-Jui},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning from Group Comparisons: Exploiting Higher Order Interactions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8208974663db80265e9bfe7b222dcb18-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8208974663db80265e9bfe7b222dcb18-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8208974663db80265e9bfe7b222dcb18-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8208974663db80265e9bfe7b222dcb18-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8208974663db80265e9bfe7b222dcb18-Reviews.html", "metareview": "", "pdf_size": 525725, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5064938026991387416&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "aff": "Department of Statistics, University of California, Davis; Department of Computer Science, University of California, Los Angeles; Department of Statistics, University of California, Davis; Department of Statistics, University of California, Davis; Department of Computer Science, University of California, Los Angeles", "aff_domain": "ucdavis.edu;ucla.edu;ucdavis.edu;ucdavis.edu;cs.ucla.edu", "email": "ucdavis.edu;ucla.edu;ucdavis.edu;ucdavis.edu;cs.ucla.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8208974663db80265e9bfe7b222dcb18-Abstract.html", "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "University of California, Davis;University of California, Los Angeles", "aff_unique_dep": "Department of Statistics;Department of Computer Science", "aff_unique_url": "https://www.ucdavis.edu;https://www.ucla.edu", "aff_unique_abbr": "UC Davis;UCLA", "aff_campus_unique_index": "0;1;0;0;1", "aff_campus_unique": "Davis;Los Angeles", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning from discriminative feature feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11393", "id": "11393", "author_site": "Sanjoy Dasgupta, Sivan Sabato, Nicholas Roberts, Akansha Dey", "author": "Sanjoy Dasgupta; Akansha Dey; Nicholas Roberts; Sivan Sabato", "abstract": "We consider the problem of learning a multi-class classifier from labels as well as simple explanations that we call \"discriminative features\". We show that such explanations can be provided whenever the target concept is a decision tree, or more generally belongs to a particular subclass of DNF formulas. We present an efficient online algorithm for learning from such feedback and we give tight bounds on the number of mistakes made during the learning process. These bounds depend only on the size of the target concept and not on the overall number of available features, which could be infinite. We also demonstrate the learning procedure experimentally.", "bibtex": "@inproceedings{NEURIPS2018_36ac8e55,\n author = {Dasgupta, Sanjoy and Dey, Akansha and Roberts, Nicholas and Sabato, Sivan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning from discriminative feature feedback},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/36ac8e558ac7690b6f44e2cb5ef93322-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/36ac8e558ac7690b6f44e2cb5ef93322-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/36ac8e558ac7690b6f44e2cb5ef93322-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/36ac8e558ac7690b6f44e2cb5ef93322-Reviews.html", "metareview": "", "pdf_size": 269404, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13108845109719412152&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science and Engineering, University of California, San Diego; Department of Computer Science and Engineering, University of California, San Diego; Department of Computer Science and Engineering, University of California, San Diego; Department of Computer Science, Ben-Gurion University of the Negev", "aff_domain": "eng.ucsd.edu;ucsd.edu;ucsd.edu;cs.bgu.ac.il", "email": "eng.ucsd.edu;ucsd.edu;ucsd.edu;cs.bgu.ac.il", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/36ac8e558ac7690b6f44e2cb5ef93322-Abstract.html", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of California, San Diego;Ben-Gurion University of the Negev", "aff_unique_dep": "Department of Computer Science and Engineering;Department of Computer Science", "aff_unique_url": "https://www.ucsd.edu;https://www.bgu.ac.il", "aff_unique_abbr": "UCSD;BGU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Learning in Games with Lossy Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11502", "id": "11502", "author_site": "Zhengyuan Zhou, Panayotis Mertikopoulos, Susan Athey, Nicholas Bambos, Peter W Glynn, Yinyu Ye", "author": "Zhengyuan Zhou; Panayotis Mertikopoulos; Susan Athey; Nicholas Bambos; Peter W. Glynn; Yinyu Ye", "abstract": "We consider a game-theoretical multi-agent learning problem where the feedback information can be lost during the learning process and rewards are given by a broad class of games known as variationally stable games. We propose a simple variant of the classical online gradient descent algorithm, called reweighted online gradient descent (ROGD) and show that in variationally stable games, if each agent adopts ROGD, then almost sure convergence to the set of Nash equilibria is guaranteed, even when the feedback loss is asynchronous and arbitrarily corrrelated among agents. We then extend the framework to deal with unknown feedback loss probabilities by using an estimator (constructed from past data) in its replacement. Finally, we further extend the framework to accomodate both asynchronous loss and stochastic rewards and establish that multi-agent ROGD learning still converges to the set of Nash equilibria in such settings. Together, these results contribute to the broad lanscape of multi-agent online learning by significantly relaxing the feedback information that is required to achieve desirable outcomes.", "bibtex": "@inproceedings{NEURIPS2018_10c66082,\n author = {Zhou, Zhengyuan and Mertikopoulos, Panayotis and Athey, Susan and Bambos, Nicholas and Glynn, Peter W and Ye, Yinyu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning in Games with Lossy Feedback},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/10c66082c124f8afe3df4886f5e516e0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/10c66082c124f8afe3df4886f5e516e0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/10c66082c124f8afe3df4886f5e516e0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/10c66082c124f8afe3df4886f5e516e0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/10c66082c124f8afe3df4886f5e516e0-Reviews.html", "metareview": "", "pdf_size": 327176, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12748924558809939020&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "Stanford University; Univ. Grenoble Alpes, CNRS, Inria, LIG; Stanford University; Stanford University; Stanford University; Stanford University", "aff_domain": "stanford.edu;imag.fr;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "email": "stanford.edu;imag.fr;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/10c66082c124f8afe3df4886f5e516e0-Abstract.html", "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Stanford University;Universit\u00e9 Grenoble Alpes", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.univ-grenoble-alpes.fr", "aff_unique_abbr": "Stanford;UGA", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;France" }, { "title": "Learning latent variable structured prediction models with Gaussian perturbations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11319", "id": "11319", "author_site": "Kevin Bello, Jean Honorio", "author": "Kevin Bello; Jean Honorio", "abstract": "The standard margin-based structured prediction commonly uses a maximum loss over all possible structured outputs. The large-margin formulation including latent variables not only results in a non-convex formulation but also increases the search space by a factor of the size of the latent space. Recent work has proposed the use of the maximum loss over random structured outputs sampled independently from some proposal distribution, with theoretical guarantees. We extend this work by including latent variables. We study a new family of loss functions under Gaussian perturbations and analyze the effect of the latent space on the generalization bounds. We show that the non-convexity of learning with latent variables originates naturally, as it relates to a tight upper bound of the Gibbs decoder distortion with respect to the latent space. Finally, we provide a formulation using random samples and relaxations that produces a tighter upper bound of the Gibbs decoder distortion up to a statistical accuracy, which enables a polynomial time evaluation of the objective function. We illustrate the method with synthetic experiments and a computer vision application.", "bibtex": "@inproceedings{NEURIPS2018_f18a6d1c,\n author = {Bello, Kevin and Honorio, Jean},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning latent variable structured prediction models with Gaussian perturbations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f18a6d1cde4b205199de8729a6637b42-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f18a6d1cde4b205199de8729a6637b42-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f18a6d1cde4b205199de8729a6637b42-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f18a6d1cde4b205199de8729a6637b42-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f18a6d1cde4b205199de8729a6637b42-Reviews.html", "metareview": "", "pdf_size": 1626256, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16864816719245449041&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Computer Science, Purdue University; Department of Computer Science, Purdue University", "aff_domain": "purdue.edu;purdue.edu", "email": "purdue.edu;purdue.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f18a6d1cde4b205199de8729a6637b42-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning long-range spatial dependencies with horizontal gated recurrent units", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11042", "id": "11042", "author_site": "Drew Linsley, Junkyung Kim, Vijay Veerabadran, Charles Windolf, Thomas Serre", "author": "Drew Linsley; Junkyung Kim; Vijay Veerabadran; Charles Windolf; Thomas Serre", "abstract": "Progress in deep learning has spawned great successes in many engineering applications. As a prime example, convolutional neural networks, a type of feedforward neural networks, are now approaching -- and sometimes even surpassing -- human accuracy on a variety of visual recognition tasks. Here, however, we show that these neural networks and their recent extensions struggle in recognition tasks where co-dependent visual features must be detected over long spatial ranges. We introduce a visual challenge, Pathfinder, and describe a novel recurrent neural network architecture called the horizontal gated recurrent unit (hGRU) to learn intrinsic horizontal connections -- both within and across feature columns. We demonstrate that a single hGRU layer matches or outperforms all tested feedforward hierarchical baselines including state-of-the-art architectures with orders of magnitude more parameters.", "bibtex": "@inproceedings{NEURIPS2018_ec895663,\n author = {Linsley, Drew and Kim, Junkyung and Veerabadran, Vijay and Windolf, Charles and Serre, Thomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning long-range spatial dependencies with horizontal gated recurrent units},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ec8956637a99787bd197eacd77acce5e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ec8956637a99787bd197eacd77acce5e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ec8956637a99787bd197eacd77acce5e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ec8956637a99787bd197eacd77acce5e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ec8956637a99787bd197eacd77acce5e-Reviews.html", "metareview": "", "pdf_size": 1118631, "gs_citation": 202, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8614534792686682745&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Carney Institute for Brain Science; Carney Institute for Brain Science; Carney Institute for Brain Science; Carney Institute for Brain Science; Carney Institute for Brain Science", "aff_domain": "brown.edu;brown.edu;brown.edu; ;brown.edu", "email": "brown.edu;brown.edu;brown.edu; ;brown.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ec8956637a99787bd197eacd77acce5e-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Carney Institute for Brain Science", "aff_unique_dep": "Brain Science", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning semantic similarity in a continuous space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11119", "id": "11119", "author": "Michel Deudon", "abstract": "We address the problem of learning semantic representation of questions to measure similarity between pairs as a continuous distance metric. Our work naturally extends Word Mover\u2019s Distance (WMD) [1] by representing text documents as normal distributions instead of bags of embedded words. Our learned metric measures the dissimilarity between two questions as the minimum amount of distance the intent (hidden representation) of one question needs to \"travel\" to match the intent of another question. We first learn to repeat, reformulate questions to infer intents as normal distributions with a deep generative model [2] (variational auto encoder). Semantic similarity between pairs is then learned discriminatively as an optimal transport distance metric (Wasserstein 2) with our novel variational siamese framework. Among known models that can read sentences individually, our proposed framework achieves competitive results on Quora duplicate questions dataset. Our work sheds light on how deep generative models can approximate distributions (semantic representations) to effectively measure semantic similarity with meaningful distance metrics from Information Theory.", "bibtex": "@inproceedings{NEURIPS2018_97e8527f,\n author = {Deudon, Michel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning semantic similarity in a continuous space},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/97e8527feaf77a97fc38f34216141515-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/97e8527feaf77a97fc38f34216141515-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/97e8527feaf77a97fc38f34216141515-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/97e8527feaf77a97fc38f34216141515-Reviews.html", "metareview": "", "pdf_size": 671818, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4253751565642833977&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Ecole Polytechnique", "aff_domain": "polytechnique.edu", "email": "polytechnique.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/97e8527feaf77a97fc38f34216141515-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Ecole Polytechnique", "aff_unique_dep": "", "aff_unique_url": "https://www.polytechnique.edu", "aff_unique_abbr": "X", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Learning sparse neural networks via sensitivity-driven regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11386", "id": "11386", "author_site": "Enzo Tartaglione, Skjalg Leps\u00f8y, Attilio Fiandrotti, Gianluca Francini", "author": "Enzo Tartaglione; Skjalg Leps\u00f8y; Attilio Fiandrotti; Gianluca Francini", "abstract": "The ever-increasing number of parameters in deep neural networks poses challenges for memory-limited applications. Regularize-and-prune methods aim at meeting these challenges by sparsifying the network weights. In this context we quantify the output sensitivity to the parameters (i.e. their relevance to the network output) and introduce a regularization term that gradually lowers the absolute value of parameters with low sensitivity. Thus, a very large fraction of the parameters approach zero and are eventually set to zero by simple thresholding. Our method surpasses most of the recent techniques both in terms of sparsity and error rates. In some cases, the method reaches twice the sparsity obtained by other techniques at equal error rates.", "bibtex": "@inproceedings{NEURIPS2018_04df4d43,\n author = {Tartaglione, Enzo and Leps\\o y, Skjalg and Fiandrotti, Attilio and Francini, Gianluca},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning sparse neural networks via sensitivity-driven regularization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/04df4d434d481c5bb723be1b6df1ee65-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/04df4d434d481c5bb723be1b6df1ee65-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/04df4d434d481c5bb723be1b6df1ee65-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/04df4d434d481c5bb723be1b6df1ee65-Reviews.html", "metareview": "", "pdf_size": 376458, "gs_citation": 103, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6831507369534445023&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Politecnico di Torino, Torino, Italy; Nuance Communications, Torino, Italy; Politecnico di Torino, Torino, Italy+T\u00e9l\u00e9com ParisTech, Paris, France; Telecom Italia, Torino, Italy", "aff_domain": "gmail.com; ; ;", "email": "gmail.com; ; ;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/04df4d434d481c5bb723be1b6df1ee65-Abstract.html", "aff_unique_index": "0;1;0+2;3", "aff_unique_norm": "Politecnico di Torino;Nuance Communications;T\u00e9l\u00e9com ParisTech;Telecom Italia", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.polito.it;https://www.nuance.com;https://www.telecom-paristech.fr;https://www.telecomitalia.com", "aff_unique_abbr": "Polito;;TP;Telecom Italia", "aff_campus_unique_index": "0;0;0+1;0", "aff_campus_unique": "Torino;Paris", "aff_country_unique_index": "0;0;0+1;0", "aff_country_unique": "Italy;France" }, { "title": "Learning to Decompose and Disentangle Representations for Video Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11075", "id": "11075", "author_site": "Jun-Ting Hsieh, Bingbin Liu, De-An Huang, Li Fei-Fei, Juan Carlos Niebles", "author": "Jun-Ting Hsieh; Bingbin Liu; De-An Huang; Li F Fei-Fei; Juan Carlos Niebles", "abstract": "Our goal is to predict future video frames given a sequence of input frames. Despite large amounts of video data, this remains a challenging task because of the high-dimensionality of video frames. We address this challenge by proposing the Decompositional Disentangled Predictive Auto-Encoder (DDPAE), a framework that combines structured probabilistic models and deep networks to automatically (i) decompose the high-dimensional video that we aim to predict into components, and (ii) disentangle each component to have low-dimensional temporal dynamics that are easier to predict. Crucially, with an appropriately specified generative model of video frames, our DDPAE is able to learn both the latent decomposition and disentanglement without explicit supervision. For the Moving MNIST dataset, we show that DDPAE is able to recover the underlying components (individual digits) and disentanglement (appearance and location) as we would intuitively do. We further demonstrate that DDPAE can be applied to the Bouncing Balls dataset involving complex interactions between multiple objects to predict the video frame directly from the pixels and recover physical states without explicit supervision.", "bibtex": "@inproceedings{NEURIPS2018_496e05e1,\n author = {Hsieh, Jun-Ting and Liu, Bingbin and Huang, De-An and Fei-Fei, Li F and Niebles, Juan Carlos},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Decompose and Disentangle Representations for Video Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/496e05e1aea0a9c4655800e8a7b9ea28-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/496e05e1aea0a9c4655800e8a7b9ea28-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/496e05e1aea0a9c4655800e8a7b9ea28-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/496e05e1aea0a9c4655800e8a7b9ea28-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/496e05e1aea0a9c4655800e8a7b9ea28-Reviews.html", "metareview": "", "pdf_size": 1305927, "gs_citation": 380, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3026670262984428356&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff": "Stanford University; Stanford University; Stanford University; Stanford University; Stanford University", "aff_domain": "stanford.edu;stanford.edu;cs.stanford.edu;cs.stanford.edu;cs.stanford.edu", "email": "stanford.edu;stanford.edu;cs.stanford.edu;cs.stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/496e05e1aea0a9c4655800e8a7b9ea28-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning to Exploit Stability for 3D Scene Parsing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11186", "id": "11186", "author_site": "Yilun Du, Zhijian Liu, Hector Basevi, Ales Leonardis, Bill Freeman, Josh Tenenbaum, Jiajun Wu", "author": "Yilun Du; Zhijian Liu; Hector Basevi; Ales Leonardis; Bill Freeman; Josh Tenenbaum; Jiajun Wu", "abstract": "Human scene understanding uses a variety of visual and non-visual cues to perform inference on object types, poses, and relations. Physics is a rich and universal cue which we exploit to enhance scene understanding. We integrate the physical cue of stability into the learning process using a REINFORCE approach coupled to a physics engine, and apply this to the problem of producing the 3D bounding boxes and poses of objects in a scene. We first show that applying physics supervision to an existing scene understanding model increases performance, produces more stable predictions, and allows training to an equivalent performance level with fewer annotated training examples. We then present a novel architecture for 3D scene parsing named Prim R-CNN, learning to predict bounding boxes as well as their 3D size, translation, and rotation. With physics supervision, Prim R-CNN outperforms existing scene understanding approaches on this problem. Finally, we show that applying physics supervision on unlabeled real images improves real domain transfer of models training on synthetic data.", "bibtex": "@inproceedings{NEURIPS2018_43feaeee,\n author = {Du, Yilun and Liu, Zhijian and Basevi, Hector and Leonardis, Ales and Freeman, Bill and Tenenbaum, Josh and Wu, Jiajun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Exploit Stability for 3D Scene Parsing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/43feaeeecd7b2fe2ae2e26d917b6477d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/43feaeeecd7b2fe2ae2e26d917b6477d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/43feaeeecd7b2fe2ae2e26d917b6477d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/43feaeeecd7b2fe2ae2e26d917b6477d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/43feaeeecd7b2fe2ae2e26d917b6477d-Reviews.html", "metareview": "", "pdf_size": 8403115, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12361643549406895907&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff": "MIT CSAIL; MIT CSAIL; University of Birmingham; University of Birmingham; MIT CSAIL; MIT CSAIL; MIT CSAIL", "aff_domain": "mit.edu;mit.edu;bham.ac.uk;bham.ac.uk;mit.edu;mit.edu;mit.edu", "email": "mit.edu;mit.edu;bham.ac.uk;bham.ac.uk;mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/43feaeeecd7b2fe2ae2e26d917b6477d-Abstract.html", "aff_unique_index": "0;0;1;1;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of Birmingham", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory;", "aff_unique_url": "https://www.csail.mit.edu;https://www.birmingham.ac.uk", "aff_unique_abbr": "MIT CSAIL;Birmingham", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;1;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Learning to Infer Graphics Programs from Hand-Drawn Images", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11588", "id": "11588", "author_site": "Kevin Ellis, Daniel Ritchie, Armando Solar-Lezama, Josh Tenenbaum", "author": "Kevin Ellis; Daniel Ritchie; Armando Solar-Lezama; Josh Tenenbaum", "abstract": "We introduce a model that learns to convert simple hand drawings\n into graphics programs written in a subset of \\LaTeX.~The model\n combines techniques from deep learning and program synthesis. We\n learn a convolutional neural network that proposes plausible drawing\n primitives that explain an image. These drawing primitives are a\n specification (spec) of what the graphics program needs to draw. We\n learn a model that uses program synthesis techniques to recover a\n graphics program from that spec. These programs have constructs like\n variable bindings, iterative loops, or simple kinds of\n conditionals. With a graphics program in hand, we can correct errors\n made by the deep network and extrapolate drawings.", "bibtex": "@inproceedings{NEURIPS2018_67880768,\n author = {Ellis, Kevin and Ritchie, Daniel and Solar-Lezama, Armando and Tenenbaum, Josh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Infer Graphics Programs from Hand-Drawn Images},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6788076842014c83cedadbe6b0ba0314-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6788076842014c83cedadbe6b0ba0314-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6788076842014c83cedadbe6b0ba0314-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6788076842014c83cedadbe6b0ba0314-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6788076842014c83cedadbe6b0ba0314-Reviews.html", "metareview": "", "pdf_size": 1525852, "gs_citation": 281, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14065112485794121024&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 16, "aff": "MIT; Brown University; MIT; MIT", "aff_domain": "mit.edu;brown.edu;csail.mit.edu;mit.edu", "email": "mit.edu;brown.edu;csail.mit.edu;mit.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6788076842014c83cedadbe6b0ba0314-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Brown University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.brown.edu", "aff_unique_abbr": "MIT;Brown", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning to Multitask", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11562", "id": "11562", "author_site": "Yu Zhang, Ying Wei, Qiang Yang", "author": "Yu Zhang; Ying Wei; Qiang Yang", "abstract": "Multitask learning has shown promising performance in many applications and many multitask models have been proposed. In order to identify an effective multitask model for a given multitask problem, we propose a learning framework called Learning to MultiTask (L2MT). To achieve the goal, L2MT exploits historical multitask experience which is organized as a training set consisting of several tuples, each of which contains a multitask problem with multiple tasks, a multitask model, and the relative test error. Based on such training set, L2MT first uses a proposed layerwise graph neural network to learn task embeddings for all the tasks in a multitask problem and then learns an estimation function to estimate the relative test error based on task embeddings and the representation of the multitask model based on a unified formulation. Given a new multitask problem, the estimation function is used to identify a suitable multitask model. Experiments on benchmark datasets show the effectiveness of the proposed L2MT framework.", "bibtex": "@inproceedings{NEURIPS2018_aeefb050,\n author = {Zhang, Yu and Wei, Ying and Yang, Qiang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Multitask},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/aeefb050911334869a7a5d9e4d0e1689-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/aeefb050911334869a7a5d9e4d0e1689-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/aeefb050911334869a7a5d9e4d0e1689-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/aeefb050911334869a7a5d9e4d0e1689-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/aeefb050911334869a7a5d9e4d0e1689-Reviews.html", "metareview": "", "pdf_size": 935856, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1340098634268505234&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "HKUST; Tencent AI Lab; HKUST", "aff_domain": "gmail.com;tencent.com;cse.ust.hk", "email": "gmail.com;tencent.com;cse.ust.hk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/aeefb050911334869a7a5d9e4d0e1689-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.ust.hk;https://ai.tencent.com", "aff_unique_abbr": "HKUST;Tencent AI Lab", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Learning to Navigate in Cities Without a Map", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11251", "id": "11251", "author_site": "Piotr Mirowski, Matt Grimes, Mateusz Malinowski, Karl Moritz Hermann, Keith Anderson, Denis Teplyashin, Karen Simonyan, koray kavukcuoglu, Andrew Zisserman, Raia Hadsell", "author": "Piotr Mirowski; Matt Grimes; Mateusz Malinowski; Karl Moritz Hermann; Keith Anderson; Denis Teplyashin; Karen Simonyan; koray kavukcuoglu; Andrew Zisserman; Raia Hadsell", "abstract": "Navigating through unstructured environments is a basic capability of intelligent creatures, and thus is of fundamental interest in the study and development of artificial intelligence. Long-range navigation is a complex cognitive task that relies on developing an internal representation of space, grounded by recognisable landmarks and robust visual processing, that can simultaneously support continuous self-localisation (\"I am here\") and a representation of the goal (\"I am going there\"). Building upon recent research that applies deep reinforcement learning to maze navigation problems, we present an end-to-end deep reinforcement learning approach that can be applied on a city scale. Recognising that successful navigation relies on integration of general policies with locale-specific knowledge, we propose a dual pathway architecture that allows locale-specific features to be encapsulated, while still enabling transfer to multiple cities. A key contribution of this paper is an interactive navigation environment that uses Google Street View for its photographic content and worldwide coverage. Our baselines demonstrate that deep reinforcement learning agents can learn to navigate in multiple cities and to traverse to target destinations that may be kilometres away. A video summarizing our research and showing the trained agent in diverse city environments as well as on the transfer task is available at: https://sites.google.com/view/learn-navigate-cities-nips18", "bibtex": "@inproceedings{NEURIPS2018_e034fb6b,\n author = {Mirowski, Piotr and Grimes, Matt and Malinowski, Mateusz and Hermann, Karl Moritz and Anderson, Keith and Teplyashin, Denis and Simonyan, Karen and kavukcuoglu, koray and Zisserman, Andrew and Hadsell, Raia},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Navigate in Cities Without a Map},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e034fb6b66aacc1d48f445ddfb08da98-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e034fb6b66aacc1d48f445ddfb08da98-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e034fb6b66aacc1d48f445ddfb08da98-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e034fb6b66aacc1d48f445ddfb08da98-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e034fb6b66aacc1d48f445ddfb08da98-Reviews.html", "metareview": "", "pdf_size": 4419421, "gs_citation": 363, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9758707731169438744&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "DeepMind; DeepMind; DeepMind; DeepMind; DeepMind; DeepMind; DeepMind; DeepMind; DeepMind; DeepMind", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "github": "https://github.com/deepmind/streetlearn", "project": "http://streetlearn.cc", "author_num": 10, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e034fb6b66aacc1d48f445ddfb08da98-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "DeepMind", "aff_unique_dep": "", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Learning to Optimize Tensor Programs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11341", "id": "11341", "author_site": "Tianqi Chen, Lianmin Zheng, Eddie Yan, Ziheng Jiang, Thierry Moreau, Luis Ceze, Carlos Guestrin, Arvind Krishnamurthy", "author": "Tianqi Chen; Lianmin Zheng; Eddie Yan; Ziheng Jiang; Thierry Moreau; Luis Ceze; Carlos Guestrin; Arvind Krishnamurthy", "abstract": "We introduce a learning-based framework to optimize tensor programs for deep learning workloads. Efficient implementations of tensor operators, such as matrix multiplication and high dimensional convolution are key enablers of effective deep learning systems. However, existing systems rely on manually optimized libraries such as cuDNN where only a narrow range of server class GPUs are well-supported. The reliance on hardware specific operator libraries limits the applicability of high-level graph optimizations and incurs significant engineering costs when deploying to new hardware targets. We use learning to remove this engineering burden. We learn domain specific statistical cost models to guide the search of tensor operator implementations over billions of possible program variants. We further accelerate the search by effective model transfer across workloads. Experimental results show that our framework delivers performance competitive with state-of-the-art hand-tuned libraries for low-power CPU, mobile GPU, and server-class GPU.", "bibtex": "@inproceedings{NEURIPS2018_8b570001,\n author = {Chen, Tianqi and Zheng, Lianmin and Yan, Eddie and Jiang, Ziheng and Moreau, Thierry and Ceze, Luis and Guestrin, Carlos and Krishnamurthy, Arvind},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Optimize Tensor Programs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8b5700012be65c9da25f49408d959ca0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8b5700012be65c9da25f49408d959ca0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8b5700012be65c9da25f49408d959ca0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8b5700012be65c9da25f49408d959ca0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8b5700012be65c9da25f49408d959ca0-Reviews.html", "metareview": "", "pdf_size": 2863796, "gs_citation": 527, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13341170825950118952&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 18, "aff": "Paul G. Allen School of Computer Science & Engineering, University of Washington; Shanghai Jiao Tong University; Paul G. Allen School of Computer Science & Engineering, University of Washington; Paul G. Allen School of Computer Science & Engineering, University of Washington; Paul G. Allen School of Computer Science & Engineering, University of Washington; Paul G. Allen School of Computer Science & Engineering, University of Washington; Paul G. Allen School of Computer Science & Engineering, University of Washington; Paul G. Allen School of Computer Science & Engineering, University of Washington", "aff_domain": "; ; ; ; ; ; ; ", "email": "; ; ; ; ; ; ; ", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8b5700012be65c9da25f49408d959ca0-Abstract.html", "aff_unique_index": "0;1;0;0;0;0;0;0", "aff_unique_norm": "University of Washington;Shanghai Jiao Tong University", "aff_unique_dep": "Paul G. Allen School of Computer Science & Engineering;", "aff_unique_url": "https://www.washington.edu;https://www.sjtu.edu.cn", "aff_unique_abbr": "UW;SJTU", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;1;0;0;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Learning to Play With Intrinsically-Motivated, Self-Aware Agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11802", "id": "11802", "author_site": "Nick Haber, Damian Mrowca, Stephanie Wang, Li Fei-Fei, Daniel Yamins", "author": "Nick Haber; Damian Mrowca; Stephanie Wang; Li F Fei-Fei; Daniel L Yamins", "abstract": "Infants are experts at playing, with an amazing ability to generate novel structured behaviors in unstructured environments that lack clear extrinsic reward signals. We seek to mathematically formalize these abilities using a neural network that implements curiosity-driven intrinsic motivation. Using a simple but ecologically naturalistic simulated environment in which an agent can move and interact with objects it sees, we propose a \"world-model\" network that learns to predict the dynamic consequences of the agent's actions. Simultaneously, we train a separate explicit \"self-model\" that allows the agent to track the error map of its world-model. It then uses the self-model to adversarially challenge the developing world-model. We demonstrate that this policy causes the agent to explore novel and informative interactions with its environment, leading to the generation of a spectrum of complex behaviors, including ego-motion prediction, object attention, and object gathering. Moreover, the world-model that the agent learns supports improved performance on object dynamics prediction, detection, localization and recognition tasks. Taken together, our results are initial steps toward creating flexible autonomous agents that self-supervise in realistic physical environments.", "bibtex": "@inproceedings{NEURIPS2018_71e63ef5,\n author = {Haber, Nick and Mrowca, Damian and Wang, Stephanie and Fei-Fei, Li F and Yamins, Daniel L},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Play With Intrinsically-Motivated, Self-Aware Agents},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/71e63ef5b7249cfc60852f0e0f5bf4c8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/71e63ef5b7249cfc60852f0e0f5bf4c8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/71e63ef5b7249cfc60852f0e0f5bf4c8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/71e63ef5b7249cfc60852f0e0f5bf4c8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/71e63ef5b7249cfc60852f0e0f5bf4c8-Reviews.html", "metareview": "", "pdf_size": 7962463, "gs_citation": 153, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4463017428516877261&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff": "Departments of Psychology1 + Pediatrics2 + Biomedical Data Science3 + Computer Science4 + Wu Tsai Neurosciences Institute5; Departments of Psychology1 + Pediatrics2 + Biomedical Data Science3 + Computer Science4 + Wu Tsai Neurosciences Institute5; Departments of Psychology1 + Pediatrics2 + Biomedical Data Science3 + Computer Science4 + Wu Tsai Neurosciences Institute5; Departments of Psychology1 + Pediatrics2 + Biomedical Data Science3 + Computer Science4 + Wu Tsai Neurosciences Institute5; Departments of Psychology1 + Pediatrics2 + Biomedical Data Science3 + Computer Science4 + Wu Tsai Neurosciences Institute5", "aff_domain": "stanford.edu;stanford.edu; ; ; ", "email": "stanford.edu;stanford.edu; ; ; ", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/71e63ef5b7249cfc60852f0e0f5bf4c8-Abstract.html", "aff_unique_index": "0+2+3;0+2+3;0+2+3;0+2+3;0+2+3", "aff_unique_norm": "Departments of Psychology;;Biomedical Data Science;Wu Tsai Neurosciences Institute", "aff_unique_dep": "Psychology;Pediatrics;Biomedical Data Science;Neurosciences", "aff_unique_url": ";;;", "aff_unique_abbr": ";;;", "aff_campus_unique_index": ";;;;", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1;1;1", "aff_country_unique": ";United States" }, { "title": "Learning to Reason with Third Order Tensor Products", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11947", "id": "11947", "author_site": "Imanol Schlag, J\u00fcrgen Schmidhuber", "author": "Imanol Schlag; J\u00fcrgen Schmidhuber", "abstract": "We combine Recurrent Neural Networks with Tensor Product Representations to\nlearn combinatorial representations of sequential data. This improves symbolic\ninterpretation and systematic generalisation. Our architecture is trained end-to-end\nthrough gradient descent on a variety of simple natural language reasoning tasks,\nsignificantly outperforming the latest state-of-the-art models in single-task and\nall-tasks settings. We also augment a subset of the data such that training and test\ndata exhibit large systematic differences and show that our approach generalises\nbetter than the previous state-of-the-art.", "bibtex": "@inproceedings{NEURIPS2018_a274315e,\n author = {Schlag, Imanol and Schmidhuber, J\\\"{u}rgen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Reason with Third Order Tensor Products},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a274315e1abede44d63005826249d1df-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a274315e1abede44d63005826249d1df-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a274315e1abede44d63005826249d1df-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a274315e1abede44d63005826249d1df-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a274315e1abede44d63005826249d1df-Reviews.html", "metareview": "", "pdf_size": 1472833, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1859815740065749231&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "The Swiss AI Lab IDSIA / USI / SUPSI; The Swiss AI Lab IDSIA / USI / SUPSI", "aff_domain": "idsia.ch;idsia.ch", "email": "idsia.ch;idsia.ch", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a274315e1abede44d63005826249d1df-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Swiss AI Lab IDSIA", "aff_unique_dep": "AI Lab", "aff_unique_url": "https://www.idsia.ch/", "aff_unique_abbr": "IDSIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Learning to Reconstruct Shapes from Unseen Classes", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11236", "id": "11236", "author_site": "Xiuming Zhang, Zhoutong Zhang, Chengkai Zhang, Josh Tenenbaum, Bill Freeman, Jiajun Wu", "author": "Xiuming Zhang; Zhoutong Zhang; Chengkai Zhang; Josh Tenenbaum; Bill Freeman; Jiajun Wu", "abstract": "From a single image, humans are able to perceive the full 3D shape of an object by exploiting learned shape priors from everyday life. Contemporary single-image 3D reconstruction algorithms aim to solve this task in a similar fashion, but often end up with priors that are highly biased by training classes. Here we present an algorithm, Generalizable Reconstruction (GenRe), designed to capture more generic, class-agnostic shape priors. We achieve this with an inference network and training procedure that combine 2.5D representations of visible surfaces (depth and silhouette), spherical shape representations of both visible and non-visible surfaces, and 3D voxel-based representations, in a principled manner that exploits the causal structure of how 3D shapes give rise to 2D images. Experiments demonstrate that GenRe performs well on single-view shape reconstruction, and generalizes to diverse novel objects from categories not seen during training.", "bibtex": "@inproceedings{NEURIPS2018_208e43f0,\n author = {Zhang, Xiuming and Zhang, Zhoutong and Zhang, Chengkai and Tenenbaum, Josh and Freeman, Bill and Wu, Jiajun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Reconstruct Shapes from Unseen Classes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/208e43f0e45c4c78cafadb83d2888cb6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/208e43f0e45c4c78cafadb83d2888cb6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/208e43f0e45c4c78cafadb83d2888cb6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/208e43f0e45c4c78cafadb83d2888cb6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/208e43f0e45c4c78cafadb83d2888cb6-Reviews.html", "metareview": "", "pdf_size": 7557654, "gs_citation": 184, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7814718909409744472&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/208e43f0e45c4c78cafadb83d2888cb6-Abstract.html" }, { "title": "Learning to Repair Software Vulnerabilities with Generative Adversarial Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11761", "id": "11761", "author_site": "Jacob Harer, Onur Ozdemir, Tomo Lazovich, Christopher Reale, Rebecca Russell, Louis Kim, Peter Chin", "author": "Jacob Harer; Onur Ozdemir; Tomo Lazovich; Christopher Reale; Rebecca Russell; Louis Kim; peter chin", "abstract": "Motivated by the problem of automated repair of software vulnerabilities, we propose an adversarial learning approach that maps from one discrete source domain to another target domain without requiring paired labeled examples or source and target domains to be bijections. We demonstrate that the proposed adversarial learning approach is an effective technique for repairing software vulnerabilities, performing close to seq2seq approaches that require labeled pairs. The proposed Generative Adversarial Network approach is application-agnostic in that it can be applied to other problems similar to code repair, such as grammar correction or sentiment translation.", "bibtex": "@inproceedings{NEURIPS2018_68abef8e,\n author = {Harer, Jacob and Ozdemir, Onur and Lazovich, Tomo and Reale, Christopher and Russell, Rebecca and Kim, Louis and chin, peter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Repair Software Vulnerabilities with Generative Adversarial Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/68abef8ee1ac9b664a90b0bbaff4f770-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/68abef8ee1ac9b664a90b0bbaff4f770-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/68abef8ee1ac9b664a90b0bbaff4f770-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/68abef8ee1ac9b664a90b0bbaff4f770-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/68abef8ee1ac9b664a90b0bbaff4f770-Reviews.html", "metareview": "", "pdf_size": 438321, "gs_citation": 107, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4658509880494078365&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": ";;;;;;", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/68abef8ee1ac9b664a90b0bbaff4f770-Abstract.html" }, { "title": "Learning to Share and Hide Intentions using Information Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11970", "id": "11970", "author_site": "DJ Strouse, Max Kleiman-Weiner, Josh Tenenbaum, Matt Botvinick, David Schwab", "author": "DJ Strouse; Max Kleiman-Weiner; Josh Tenenbaum; Matt Botvinick; David J Schwab", "abstract": "Learning to cooperate with friends and compete with foes is a key component of multi-agent reinforcement learning. Typically to do so, one requires access to either a model of or interaction with the other agent(s). Here we show how to learn effective strategies for cooperation and competition in an asymmetric information game with no such model or interaction. Our approach is to encourage an agent to reveal or hide their intentions using an information-theoretic regularizer. We consider both the mutual information between goal and action given state, as well as the mutual information between goal and state. We show how to stochastically optimize these regularizers in a way that is easy to integrate with policy gradient reinforcement learning. Finally, we demonstrate that cooperative (competitive) policies learned with our approach lead to more (less) reward for a second agent in two simple asymmetric information games.", "bibtex": "@inproceedings{NEURIPS2018_1ef03ed0,\n author = {Strouse, DJ and Kleiman-Weiner, Max and Tenenbaum, Josh and Botvinick, Matt and Schwab, David J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Share and Hide Intentions using Information Regularization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1ef03ed0cd5863c550128836b28ec3e9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1ef03ed0cd5863c550128836b28ec3e9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1ef03ed0cd5863c550128836b28ec3e9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1ef03ed0cd5863c550128836b28ec3e9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1ef03ed0cd5863c550128836b28ec3e9-Reviews.html", "metareview": "", "pdf_size": 4818879, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17666377994780351102&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Princeton University; MIT; MIT; DeepMind+UCL; CUNY Graduate Center", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1ef03ed0cd5863c550128836b28ec3e9-Abstract.html", "aff_unique_index": "0;1;1;2+3;4", "aff_unique_norm": "Princeton University;Massachusetts Institute of Technology;DeepMind;University College London;City University of New York", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.princeton.edu;https://web.mit.edu;https://deepmind.com;https://www.ucl.ac.uk;https://www.gc.cuny.edu", "aff_unique_abbr": "Princeton;MIT;DeepMind;UCL;CUNY GC", "aff_campus_unique_index": ";1", "aff_campus_unique": ";Graduate Center", "aff_country_unique_index": "0;0;0;1+1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Learning to Solve SMT Formulas", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11976", "id": "11976", "author_site": "Mislav Balunovic, Pavol Bielik, Martin Vechev", "author": "Mislav Balunovic; Pavol Bielik; Martin Vechev", "abstract": "We present a new approach for learning to solve SMT formulas. We phrase the challenge of solving SMT formulas as a tree search problem where at each step a transformation is applied to the input formula until the formula is solved. Our approach works in two phases: first, given a dataset of unsolved formulas we learn a policy that for each formula selects a suitable transformation to apply at each step in order to solve the formula, and second, we synthesize a strategy in the form of a loop-free program with branches. This strategy is an interpretable representation of the policy decisions and is used to guide the SMT solver to decide formulas more efficiently, without requiring any modification to the solver itself and without needing to evaluate the learned policy at inference time. We show that our approach is effective in practice - it solves 17% more formulas over a range of benchmarks and achieves up to 100x runtime improvement over a state-of-the-art SMT solver.", "bibtex": "@inproceedings{NEURIPS2018_68331ff0,\n author = {Balunovic, Mislav and Bielik, Pavol and Vechev, Martin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Solve SMT Formulas},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/68331ff0427b551b68e911eebe35233b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/68331ff0427b551b68e911eebe35233b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/68331ff0427b551b68e911eebe35233b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/68331ff0427b551b68e911eebe35233b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/68331ff0427b551b68e911eebe35233b-Reviews.html", "metareview": "", "pdf_size": 484830, "gs_citation": 92, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12444848553746905691&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science, ETH Z\u00fcrich, Switzerland; Department of Computer Science, ETH Z\u00fcrich, Switzerland; Department of Computer Science, ETH Z\u00fcrich, Switzerland", "aff_domain": "ethz.ch;inf.ethz.ch;inf.ethz.ch", "email": "ethz.ch;inf.ethz.ch;inf.ethz.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/68331ff0427b551b68e911eebe35233b-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Learning to Specialize with Knowledge Distillation for Visual Question Answering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11774", "id": "11774", "author_site": "Jonghwan Mun, Kimin Lee, Jinwoo Shin, Bohyung Han", "author": "Jonghwan Mun; Kimin Lee; Jinwoo Shin; Bohyung Han", "abstract": "Visual Question Answering (VQA) is a notoriously challenging problem because it involves various heterogeneous tasks defined by questions within a unified framework. Learning specialized models for individual types of tasks is intuitively attracting but surprisingly difficult; it is not straightforward to outperform naive independent ensemble approach. We present a principled algorithm to learn specialized models with knowledge distillation under a multiple choice learning (MCL) framework, where training examples are assigned dynamically to a subset of models for updating network parameters. The assigned and non-assigned models are learned to predict ground-truth answers and imitate their own base models before specialization, respectively. Our approach alleviates the limitation of data deficiency in existing MCL frameworks, and allows each model to learn its own specialized expertise without forgetting general knowledge. The proposed framework is model-agnostic and applicable to any tasks other than VQA, e.g., image classification with a large number of labels but few per-class examples, which is known to be difficult under existing MCL schemes. Our experimental results indeed demonstrate that our method outperforms other baselines for VQA and image classification.", "bibtex": "@inproceedings{NEURIPS2018_0f281810,\n author = {Mun, Jonghwan and Lee, Kimin and Shin, Jinwoo and Han, Bohyung},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Specialize with Knowledge Distillation for Visual Question Answering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0f2818101a7ac4b96ceeba38de4b934c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0f2818101a7ac4b96ceeba38de4b934c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0f2818101a7ac4b96ceeba38de4b934c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0f2818101a7ac4b96ceeba38de4b934c-Reviews.html", "metareview": "", "pdf_size": 715342, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13278438189351519907&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Computer Vision Lab., POSTECH, Pohang, Korea; Algorithmic Intelligence Lab., KAIST, Daejeon, Korea; Algorithmic Intelligence Lab., KAIST, Daejeon, Korea; Computer Vision Lab., ASRI, Seoul National University, Seoul, Korea", "aff_domain": "postech.ac.kr;kaist.ac.kr;kaist.ac.kr;snu.ac.kr", "email": "postech.ac.kr;kaist.ac.kr;kaist.ac.kr;snu.ac.kr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0f2818101a7ac4b96ceeba38de4b934c-Abstract.html", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "POSTECH;KAIST;Seoul National University", "aff_unique_dep": "Computer Vision Lab.;Algorithmic Intelligence Lab.;Computer Vision Lab.", "aff_unique_url": "https://www.postech.ac.kr;https://www.kaist.ac.kr;https://www.snu.ac.kr", "aff_unique_abbr": "POSTECH;KAIST;SNU", "aff_campus_unique_index": "0;1;1;2", "aff_campus_unique": "Pohang;Daejeon;Seoul", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Learning to Teach with Dynamic Loss Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11625", "id": "11625", "author_site": "Lijun Wu, Fei Tian, Yingce Xia, Yang Fan, Tao Qin, Lai Jian-Huang, Tie-Yan Liu", "author": "Lijun Wu; Fei Tian; Yingce Xia; Yang Fan; Tao Qin; Lai Jian-Huang; Tie-Yan Liu", "abstract": "Teaching is critical to human society: it is with teaching that prospective students are educated and human civilization can be inherited and advanced. A good teacher not only provides his/her students with qualified teaching materials (e.g., textbooks), but also sets up appropriate learning objectives (e.g., course projects and exams) considering different situations of a student. When it comes to artificial intelligence, treating machine learning models as students, the loss functions that are optimized act as perfect counterparts of the learning objective set by the teacher. In this work, we explore the possibility of imitating human teaching behaviors by dynamically and automatically outputting appropriate loss functions to train machine learning models. Different from typical learning settings in which the loss function of a machine learning model is predefined and fixed, in our framework, the loss function of a machine learning model (we call it student) is defined by another machine learning model (we call it teacher). The ultimate goal of teacher model is cultivating the student to have better performance measured on development dataset. Towards that end, similar to human teaching, the teacher, a parametric model, dynamically outputs different loss functions that will be used and optimized by its student model at different training stages. We develop an efficient learning method for the teacher model that makes gradient based optimization possible, exempt of the ineffective solutions such as policy optimization. We name our method as ``learning to teach with dynamic loss functions'' (L2T-DLF for short). Extensive experiments on real world tasks including image classification and neural machine translation demonstrate that our method significantly improves the quality of various student models.", "bibtex": "@inproceedings{NEURIPS2018_8051a3c4,\n author = {Wu, Lijun and Tian, Fei and Xia, Yingce and Fan, Yang and Qin, Tao and Jian-Huang, Lai and Liu, Tie-Yan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Teach with Dynamic Loss Functions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8051a3c40561002834e59d566b7430cf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8051a3c40561002834e59d566b7430cf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8051a3c40561002834e59d566b7430cf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8051a3c40561002834e59d566b7430cf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8051a3c40561002834e59d566b7430cf-Reviews.html", "metareview": "", "pdf_size": 887892, "gs_citation": 149, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16109199048371196819&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Sun Yat-sen University; Microsoft Research + Sun Yat-sen University; Microsoft Research; University of Science and Technology of China + Microsoft Research; Microsoft Research; Sun Yat-sen University; Microsoft Research", "aff_domain": "mail2.sysu.edu.cn;microsoft.com;microsoft.com;mail.ustc.edu.cn;microsoft.com;mail.sysu.edu.cn;microsoft.com", "email": "mail2.sysu.edu.cn;microsoft.com;microsoft.com;mail.ustc.edu.cn;microsoft.com;mail.sysu.edu.cn;microsoft.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8051a3c40561002834e59d566b7430cf-Abstract.html", "aff_unique_index": "0;1+0;1;2+1;1;0;1", "aff_unique_norm": "Sun Yat-sen University;Microsoft;University of Science and Technology of China", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "http://www.sysu.edu.cn/;https://www.microsoft.com/en-us/research;http://www.ustc.edu.cn", "aff_unique_abbr": "SYSU;MSR;USTC", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;1+0;1;0+1;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Learning towards Minimum Hyperspherical Energy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11603", "id": "11603", "author_site": "Weiyang Liu, Rongmei Lin, Zhen Liu, Lixin Liu, Zhiding Yu, Bo Dai, Le Song", "author": "Weiyang Liu; Rongmei Lin; Zhen Liu; Lixin Liu; Zhiding Yu; Bo Dai; Le Song", "abstract": "Neural networks are a powerful class of nonlinear functions that can be trained end-to-end on various applications. While the over-parametrization nature in many neural networks renders the ability to fit complex functions and the strong representation power to handle challenging tasks, it also leads to highly correlated neurons that can hurt the generalization ability and incur unnecessary computation cost. As a result, how to regularize the network to avoid undesired representation redundancy becomes an important issue. To this end, we draw inspiration from a well-known problem in physics -- Thomson problem, where one seeks to find a state that distributes N electrons on a unit sphere as evenly as possible with minimum potential energy. In light of this intuition, we reduce the redundancy regularization problem to generic energy minimization, and propose a minimum hyperspherical energy (MHE) objective as generic regularization for neural networks. We also propose a few novel variants of MHE, and provide some insights from a theoretical point of view. Finally, we apply neural networks with MHE regularization to several challenging tasks. Extensive experiments demonstrate the effectiveness of our intuition, by showing the superior performance with MHE regularization.", "bibtex": "@inproceedings{NEURIPS2018_177540c7,\n author = {Liu, Weiyang and Lin, Rongmei and Liu, Zhen and Liu, Lixin and Yu, Zhiding and Dai, Bo and Song, Le},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning towards Minimum Hyperspherical Energy},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/177540c7bcb8db31697b601642eac8d4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/177540c7bcb8db31697b601642eac8d4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/177540c7bcb8db31697b601642eac8d4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/177540c7bcb8db31697b601642eac8d4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/177540c7bcb8db31697b601642eac8d4-Reviews.html", "metareview": "", "pdf_size": 3719271, "gs_citation": 178, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8926876900214628419&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Georgia Institute of Technology; Emory University; Georgia Institute of Technology; South China University of Technology; NVIDIA; Georgia Institute of Technology+Google Brain; Georgia Institute of Technology+Ant Financial", "aff_domain": "gatech.edu; ; ; ; ; ; ", "email": "gatech.edu; ; ; ; ; ; ", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/177540c7bcb8db31697b601642eac8d4-Abstract.html", "aff_unique_index": "0;1;0;2;3;0+4;0+5", "aff_unique_norm": "Georgia Institute of Technology;Emory University;South China University of Technology;NVIDIA;Google;Ant Financial", "aff_unique_dep": ";;;NVIDIA Corporation;Google Brain;", "aff_unique_url": "https://www.gatech.edu;https://www.emory.edu;https://www.scut.edu.cn;https://www.nvidia.com;https://brain.google.com;https://www.antgroup.com", "aff_unique_abbr": "Georgia Tech;Emory;SCUT;NVIDIA;Google Brain;Ant Financial", "aff_campus_unique_index": "1;", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;0;0+0;0+1", "aff_country_unique": "United States;China" }, { "title": "Learning with SGD and Random Features", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11966", "id": "11966", "author_site": "Luigi Carratino, Alessandro Rudi, Lorenzo Rosasco", "author": "Luigi Carratino; Alessandro Rudi; Lorenzo Rosasco", "abstract": "Sketching and stochastic gradient methods are arguably the most common techniques to derive efficient large scale learning algorithms. In this paper, we investigate their application in the context of nonparametric statistical learning. More precisely, we study the estimator defined by stochastic gradient with mini batches and random features. The latter can be seen as form of nonlinear sketching and used to define approximate kernel methods. The considered estimator is not explicitly penalized/constrained and regularization is implicit. Indeed, our study highlights how different parameters, such as number of features, iterations, step-size and mini-batch size control the learning properties of the solutions. We do this by deriving optimal finite sample bounds, under standard assumptions. The obtained results are corroborated and illustrated by numerical experiments.", "bibtex": "@inproceedings{NEURIPS2018_741a0099,\n author = {Carratino, Luigi and Rudi, Alessandro and Rosasco, Lorenzo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning with SGD and Random Features},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/741a0099c9ac04c7bfc822caf7c7459f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/741a0099c9ac04c7bfc822caf7c7459f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/741a0099c9ac04c7bfc822caf7c7459f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/741a0099c9ac04c7bfc822caf7c7459f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/741a0099c9ac04c7bfc822caf7c7459f-Reviews.html", "metareview": "", "pdf_size": 768159, "gs_citation": 97, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15214283539921411584&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "University of Genoa, Genoa, Italy; INRIA \u2013 Sierra Project-team, \u00c9cole Normale Sup\u00e9rieure, Paris; University of Genoa, LCSL \u2013 IIT & MIT", "aff_domain": "dibris.unige.it; ; ", "email": "dibris.unige.it; ; ", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/741a0099c9ac04c7bfc822caf7c7459f-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Genoa;\u00c9cole Normale Sup\u00e9rieure", "aff_unique_dep": ";INRIA \u2013 Sierra Project-team", "aff_unique_url": "https://www.unige.it;https://www.ens.fr", "aff_unique_abbr": "UniGe;ENS", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Genoa;Paris;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Italy;France" }, { "title": "Learning without the Phase: Regularized PhaseMax Achieves Optimal Sample Complexity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11826", "id": "11826", "author_site": "Fariborz Salehi, Ehsan Abbasi, Babak Hassibi", "author": "Fariborz Salehi; Ehsan Abbasi; Babak Hassibi", "abstract": "The problem of estimating an unknown signal, $\\mathbf x_0\\in \\mathbb R^n$, from a vector $\\mathbf y\\in \\mathbb R^m$ consisting of $m$ magnitude-only measurements of the form $y_i=|\\mathbf a_i\\mathbf x_0|$, where $\\mathbf a_i$'s are the rows of a known measurement matrix $\\mathbf A$ is a classical problem known as phase retrieval. This problem arises when measuring the phase is costly or altogether infeasible. In many applications in machine learning, signal processing, statistics, etc., the underlying signal has certain structure (sparse, low-rank, finite alphabet, etc.), opening of up the possibility of recovering $\\mathbf x_0$ from a number of measurements smaller than the ambient dimension, i.e., $m", "bibtex": "@inproceedings{NEURIPS2018_b91f4f4d,\n author = {Salehi, Fariborz and Abbasi, Ehsan and Hassibi, Babak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning without the Phase: Regularized PhaseMax Achieves Optimal Sample Complexity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b91f4f4d36fa98a94ac5584af95594a0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b91f4f4d36fa98a94ac5584af95594a0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b91f4f4d36fa98a94ac5584af95594a0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b91f4f4d36fa98a94ac5584af95594a0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b91f4f4d36fa98a94ac5584af95594a0-Reviews.html", "metareview": "", "pdf_size": 588762, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18356015506780438708&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Department of Electrical Engineering, Caltech; Department of Electrical Engineering, Caltech; Department of Electrical Engineering, Caltech", "aff_domain": "caltech.edu;caltech.edu;caltech.edu", "email": "caltech.edu;caltech.edu;caltech.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b91f4f4d36fa98a94ac5584af95594a0-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "California Institute of Technology", "aff_unique_dep": "Department of Electrical Engineering", "aff_unique_url": "https://www.caltech.edu", "aff_unique_abbr": "Caltech", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pasadena", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Legendre Decomposition for Tensors", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11841", "id": "11841", "author_site": "Mahito Sugiyama, Hiroyuki Nakahara, Koji Tsuda", "author": "Mahito Sugiyama; Hiroyuki Nakahara; Koji Tsuda", "abstract": "We present a novel nonnegative tensor decomposition method, called Legendre decomposition, which factorizes an input tensor into a multiplicative combination of parameters. Thanks to the well-developed theory of information geometry, the reconstructed tensor is unique and always minimizes the KL divergence from an input tensor. We empirically show that Legendre decomposition can more accurately reconstruct tensors than other nonnegative tensor decomposition methods.", "bibtex": "@inproceedings{NEURIPS2018_56a3107c,\n author = {Sugiyama, Mahito and Nakahara, Hiroyuki and Tsuda, Koji},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Legendre Decomposition for Tensors},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/56a3107cad6611c8337ee36d178ca129-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/56a3107cad6611c8337ee36d178ca129-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/56a3107cad6611c8337ee36d178ca129-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/56a3107cad6611c8337ee36d178ca129-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/56a3107cad6611c8337ee36d178ca129-Reviews.html", "metareview": "", "pdf_size": 233080, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12973396671492815941&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 17, "aff": "National Institute of Informatics + JST, PRESTO; RIKEN Center for Brain Science; The University of Tokyo + NIMS + RIKEN AIP", "aff_domain": "nii.ac.jp;brain.riken.jp;k.u-tokyo.ac.jp", "email": "nii.ac.jp;brain.riken.jp;k.u-tokyo.ac.jp", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/56a3107cad6611c8337ee36d178ca129-Abstract.html", "aff_unique_index": "0+1;2;3+4+2", "aff_unique_norm": "National Institute of Informatics;Japan Science and Technology Agency;RIKEN;University of Tokyo;National Institute for Materials Science", "aff_unique_dep": ";;Center for Brain Science;;", "aff_unique_url": "https://www.nii.ac.jp/;https://www.jst.go.jp;https://www.riken.jp/en/;https://www.u-tokyo.ac.jp;https://www.nims.go.jp", "aff_unique_abbr": "NII;JST;RIKEN;UTokyo;NIMS", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;0+0+0", "aff_country_unique": "Japan" }, { "title": "Leveraged volume sampling for linear regression", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11259", "id": "11259", "author_site": "Michal Derezinski, Manfred K. Warmuth, Daniel Hsu", "author": "Michal Derezinski; Manfred K. Warmuth; Daniel J. Hsu", "abstract": "Suppose an n x d design matrix in a linear regression problem is given, \nbut the response for each point is hidden unless explicitly requested. \nThe goal is to sample only a small number k << n of the responses, \nand then produce a weight vector whose sum of squares loss over", "bibtex": "@inproceedings{NEURIPS2018_2ba8698b,\n author = {Derezinski, Michal and Warmuth, Manfred K. K and Hsu, Daniel J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Leveraged volume sampling for linear regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2ba8698b79439589fdd2b0f7218d8b07-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2ba8698b79439589fdd2b0f7218d8b07-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2ba8698b79439589fdd2b0f7218d8b07-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2ba8698b79439589fdd2b0f7218d8b07-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2ba8698b79439589fdd2b0f7218d8b07-Reviews.html", "metareview": "", "pdf_size": 654996, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4780659693584343535&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, University of California, Santa Cruz; Department of Computer Science, University of California, Santa Cruz; Computer Science Department, Columbia University, New York", "aff_domain": "berkeley.edu;ucsc.edu;cs.columbia.edu", "email": "berkeley.edu;ucsc.edu;cs.columbia.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2ba8698b79439589fdd2b0f7218d8b07-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, Santa Cruz;Columbia University", "aff_unique_dep": "Department of Computer Science;Computer Science Department", "aff_unique_url": "https://www.ucsc.edu;https://www.columbia.edu", "aff_unique_abbr": "UCSC;Columbia", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Santa Cruz;New York", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Leveraging the Exact Likelihood of Deep Latent Variable Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11384", "id": "11384", "author_site": "Pierre-Alexandre Mattei, Jes Frellsen", "author": "Pierre-Alexandre Mattei; Jes Frellsen", "abstract": "Deep latent variable models (DLVMs) combine the approximation abilities of deep neural networks and the statistical foundations of generative models. Variational methods are commonly used for inference; however, the exact likelihood of these models has been largely overlooked. The purpose of this work is to study the general properties of this quantity and to show how they can be leveraged in practice. We focus on important inferential problems that rely on the likelihood: estimation and missing data imputation. First, we investigate maximum likelihood estimation for DLVMs: in particular, we show that most unconstrained models used for continuous data have an unbounded likelihood function. This problematic behaviour is demonstrated to be a source of mode collapse. We also show how to ensure the existence of maximum likelihood estimates, and draw useful connections with nonparametric mixture models. Finally, we describe an algorithm for missing data imputation using the exact conditional likelihood of a DLVM. On several data sets, our algorithm consistently and significantly outperforms the usual imputation scheme used for DLVMs.", "bibtex": "@inproceedings{NEURIPS2018_0609154f,\n author = {Mattei, Pierre-Alexandre and Frellsen, Jes},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Leveraging the Exact Likelihood of Deep Latent Variable Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0609154fa35b3194026346c9cac2a248-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0609154fa35b3194026346c9cac2a248-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0609154fa35b3194026346c9cac2a248-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0609154fa35b3194026346c9cac2a248-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0609154fa35b3194026346c9cac2a248-Reviews.html", "metareview": "", "pdf_size": 960444, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12363175245001363216&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science, IT University of Copenhagen; Department of Computer Science, IT University of Copenhagen", "aff_domain": "itu.dk;itu.dk", "email": "itu.dk;itu.dk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0609154fa35b3194026346c9cac2a248-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "IT University of Copenhagen", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://itu.dk", "aff_unique_abbr": "ITU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Denmark" }, { "title": "Life-Long Disentangled Representation Learning with Cross-Domain Latent Homologies", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11937", "id": "11937", "author_site": "Alessandro Achille, Tom Eccles, Loic Matthey, Chris Burgess, Nicholas Watters, Alexander Lerchner, Irina Higgins", "author": "Alessandro Achille; Tom Eccles; Loic Matthey; Chris Burgess; Nicholas Watters; Alexander Lerchner; Irina Higgins", "abstract": "Intelligent behaviour in the real-world requires the ability to acquire new knowledge from an ongoing sequence of experiences while preserving and reusing past knowledge. We propose a novel algorithm for unsupervised representation learning from piece-wise stationary visual data: Variational Autoencoder with Shared Embeddings (VASE). Based on the Minimum Description Length principle, VASE automatically detects shifts in the data distribution and allocates spare representational capacity to new knowledge, while simultaneously protecting previously learnt representations from catastrophic forgetting. Our approach encourages the learnt representations to be disentangled, which imparts a number of desirable properties: VASE can deal sensibly with ambiguous inputs, it can enhance its own representations through imagination-based exploration, and most importantly, it exhibits semantically meaningful sharing of latents between different datasets. Compared to baselines with entangled representations, our approach is able to reason beyond surface-level statistics and perform semantically meaningful cross-domain inference.", "bibtex": "@inproceedings{NEURIPS2018_a0afdf1a,\n author = {Achille, Alessandro and Eccles, Tom and Matthey, Loic and Burgess, Chris and Watters, Nicholas and Lerchner, Alexander and Higgins, Irina},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Life-Long Disentangled Representation Learning with Cross-Domain Latent Homologies},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a0afdf1ac166b8652ffe9dee6eac779e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a0afdf1ac166b8652ffe9dee6eac779e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a0afdf1ac166b8652ffe9dee6eac779e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a0afdf1ac166b8652ffe9dee6eac779e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a0afdf1ac166b8652ffe9dee6eac779e-Reviews.html", "metareview": "", "pdf_size": 5585874, "gs_citation": 150, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11424176286394196389&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "UCLA; DeepMind; DeepMind; DeepMind; DeepMind; DeepMind; DeepMind", "aff_domain": "cs.ucla.edu;google.com;google.com;google.com;google.com;google.com;google.com", "email": "cs.ucla.edu;google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a0afdf1ac166b8652ffe9dee6eac779e-Abstract.html", "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "University of California, Los Angeles;DeepMind", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;https://deepmind.com", "aff_unique_abbr": "UCLA;DeepMind", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "Lifelong Inverse Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11444", "id": "11444", "author_site": "Jorge Mendez, Shashank Shivkumar, Eric Eaton", "author": "Jorge Mendez; Shashank Shivkumar; Eric Eaton", "abstract": "Methods for learning from demonstration (LfD) have shown success in acquiring behavior policies by imitating a user. However, even for a single task, LfD may require numerous demonstrations. For versatile agents that must learn many tasks via demonstration, this process would substantially burden the user if each task were learned in isolation. To address this challenge, we introduce the novel problem of lifelong learning from demonstration, which allows the agent to continually build upon knowledge learned from previously demonstrated tasks to accelerate the learning of new tasks, reducing the amount of demonstrations required. As one solution to this problem, we propose the first lifelong learning approach to inverse reinforcement learning, which learns consecutive tasks via demonstration, continually transferring knowledge between tasks to improve performance.", "bibtex": "@inproceedings{NEURIPS2018_2d969e2c,\n author = {Mendez, Jorge and Shivkumar, Shashank and Eaton, Eric},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Lifelong Inverse Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2d969e2cee8cfa07ce7ca0bb13c7a36d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2d969e2cee8cfa07ce7ca0bb13c7a36d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2d969e2cee8cfa07ce7ca0bb13c7a36d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2d969e2cee8cfa07ce7ca0bb13c7a36d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2d969e2cee8cfa07ce7ca0bb13c7a36d-Reviews.html", "metareview": "", "pdf_size": 980096, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8930935480048739276&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Computer and Information Science, University of Pennsylvania; Department of Computer and Information Science, University of Pennsylvania; Department of Computer and Information Science, University of Pennsylvania", "aff_domain": "seas.upenn.edu;seas.upenn.edu;seas.upenn.edu", "email": "seas.upenn.edu;seas.upenn.edu;seas.upenn.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2d969e2cee8cfa07ce7ca0bb13c7a36d-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "Department of Computer and Information Science", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Lifted Weighted Mini-Bucket", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11977", "id": "11977", "author_site": "Nicholas Gallo, Alexander Ihler", "author": "Nicholas Gallo; Alex Ihler", "abstract": "Many graphical models, such as Markov Logic Networks (MLNs) with evidence, possess highly symmetric substructures but no exact symmetries. Unfortunately, there are few principled methods that exploit these symmetric substructures to perform efficient approximate inference. In this paper, we present a lifted variant of the Weighted Mini-Bucket elimination algorithm which provides a principled way to (i) exploit the highly symmetric substructure of MLN models, and (ii) incorporate high-order inference terms which are necessary for high quality approximate inference. Our method has significant control over the accuracy-time trade-off of the approximation, allowing us to generate any-time approximations. Experimental results demonstrate the utility of this class of approximations, especially in models with strong repulsive potentials.", "bibtex": "@inproceedings{NEURIPS2018_bea6cfd5,\n author = {Gallo, Nicholas and Ihler, Alexander T},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Lifted Weighted Mini-Bucket},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/bea6cfd50b4f5e3c735a972cf0eb8450-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/bea6cfd50b4f5e3c735a972cf0eb8450-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/bea6cfd50b4f5e3c735a972cf0eb8450-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/bea6cfd50b4f5e3c735a972cf0eb8450-Reviews.html", "metareview": "", "pdf_size": 1643866, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8387846070728269859&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "University of California Irvine; University of California Irvine", "aff_domain": "uci.edu;ics.uci.edu", "email": "uci.edu;ics.uci.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/bea6cfd50b4f5e3c735a972cf0eb8450-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Limited Memory Kelley's Method Converges for Composite Convex and Submodular Objectives", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11436", "id": "11436", "author_site": "Song Zhou, Swati Gupta, Madeleine Udell", "author": "Song Zhou; Swati Gupta; Madeleine Udell", "abstract": "The original simplicial method (OSM), a variant of the classic Kelley\u2019s cutting plane method, has been shown to converge to the minimizer of a composite convex and submodular objective, though no rate of convergence for this method was known. Moreover, OSM is required to solve subproblems in each iteration whose size grows linearly in the number of iterations. We propose a limited memory version of Kelley\u2019s method (L-KM) and of OSM that requires limited memory (at most n+ 1 constraints for an n-dimensional problem) independent of the iteration. We prove convergence for L-KM when the convex part of the objective g is strongly convex and show it converges linearly when g is also smooth. Our analysis relies on duality between minimization of the composite convex and submodular objective and minimization of a convex function over the submodular base polytope. We introduce a limited memory version, L-FCFW, of the Fully-Corrective Frank-Wolfe (FCFW) method with approximate correction, to solve the dual problem. We show that L-FCFW and L-KM are dual algorithms that produce the same sequence of iterates; hence both converge linearly (when g is smooth and strongly convex) and with limited memory. We propose L-KM to minimize composite convex and submodular objectives; however, our results on L-FCFW hold for general polytopes and may be of independent interest.", "bibtex": "@inproceedings{NEURIPS2018_f8e59f4b,\n author = {Zhou, Song and Gupta, Swati and Udell, Madeleine},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Limited Memory Kelley\\textquotesingle s Method Converges for Composite Convex and Submodular Objectives},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f8e59f4b2fe7c5705bf878bbd494ccdf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f8e59f4b2fe7c5705bf878bbd494ccdf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f8e59f4b2fe7c5705bf878bbd494ccdf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f8e59f4b2fe7c5705bf878bbd494ccdf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f8e59f4b2fe7c5705bf878bbd494ccdf-Reviews.html", "metareview": "", "pdf_size": 973870, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=310899504733118333&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Cornell University; Georgia Institute of Technology; Cornell University", "aff_domain": "cornell.edu;gatech.edu;cornell.edu", "email": "cornell.edu;gatech.edu;cornell.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f8e59f4b2fe7c5705bf878bbd494ccdf-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Cornell University;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.gatech.edu", "aff_unique_abbr": "Cornell;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Link Prediction Based on Graph Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11505", "id": "11505", "author_site": "Muhan Zhang, Yixin Chen", "author": "Muhan Zhang; Yixin Chen", "abstract": "Link prediction is a key problem for network-structured data. Link prediction heuristics use some score functions, such as common neighbors and Katz index, to measure the likelihood of links. They have obtained wide practical uses due to their simplicity, interpretability, and for some of them, scalability. However, every heuristic has a strong assumption on when two nodes are likely to link, which limits their effectiveness on networks where these assumptions fail. In this regard, a more reasonable way should be learning a suitable heuristic from a given network instead of using predefined ones. By extracting a local subgraph around each target link, we aim to learn a function mapping the subgraph patterns to link existence, thus automatically learning a ``heuristic'' that suits the current network. In this paper, we study this heuristic learning paradigm for link prediction. First, we develop a novel $\\gamma$-decaying heuristic theory. The theory unifies a wide range of heuristics in a single framework, and proves that all these heuristics can be well approximated from local subgraphs. Our results show that local subgraphs reserve rich information related to link existence. Second, based on the $\\gamma$-decaying theory, we propose a new method to learn heuristics from local subgraphs using a graph neural network (GNN). Its experimental results show unprecedented performance, working consistently well on a wide range of problems.", "bibtex": "@inproceedings{NEURIPS2018_53f0d7c5,\n author = {Zhang, Muhan and Chen, Yixin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Link Prediction Based on Graph Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/53f0d7c537d99b3824f0f99d62ea2428-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/53f0d7c537d99b3824f0f99d62ea2428-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/53f0d7c537d99b3824f0f99d62ea2428-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/53f0d7c537d99b3824f0f99d62ea2428-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/53f0d7c537d99b3824f0f99d62ea2428-Reviews.html", "metareview": "", "pdf_size": 448931, "gs_citation": 2667, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11968553220977234326&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Department of CSE, Washington University in St. Louis; Department of CSE, Washington University in St. Louis", "aff_domain": "wustl.edu;cse.wustl.edu", "email": "wustl.edu;cse.wustl.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/53f0d7c537d99b3824f0f99d62ea2428-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Washington University in St. Louis", "aff_unique_dep": "Department of CSE", "aff_unique_url": "https://wustl.edu", "aff_unique_abbr": "WashU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "St. Louis", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "LinkNet: Relational Embedding for Scene Graph", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11079", "id": "11079", "author_site": "Sanghyun Woo, Dahun Kim, Donghyeon Cho, In So Kweon", "author": "Sanghyun Woo; Dahun Kim; Donghyeon Cho; In So Kweon", "abstract": "Objects and their relationships are critical contents for image understanding. A scene graph provides a structured description that captures these properties of an image. However, reasoning about the relationships between objects is very challenging and only a few recent works have attempted to solve the problem of generating a scene graph from an image. In this paper, we present a novel method that improves scene graph generation by explicitly modeling inter-dependency among the entire object instances. We design a simple and effective relational embedding module that enables our model to jointly represent connections among all related objects, rather than focus on an object in isolation. Our novel method significantly benefits two main parts of the scene graph generation task: object classification and relationship classification. Using it on top of a basic Faster R-CNN, our model achieves state-of-the-art results on the Visual Genome benchmark. We further push the performance by introducing global context encoding module and geometrical layout encoding module. We validate our final model, LinkNet, through extensive ablation studies, demonstrating its efficacy in scene graph generation.", "bibtex": "@inproceedings{NEURIPS2018_58238e9a,\n author = {Woo, Sanghyun and Kim, Dahun and Cho, Donghyeon and Kweon, In So},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {LinkNet: Relational Embedding for Scene Graph},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/58238e9ae2dd305d79c2ebc8c1883422-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/58238e9ae2dd305d79c2ebc8c1883422-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/58238e9ae2dd305d79c2ebc8c1883422-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/58238e9ae2dd305d79c2ebc8c1883422-Reviews.html", "metareview": "", "pdf_size": 2170880, "gs_citation": 179, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10453507458908677909&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "EE, KAIST; EE, KAIST; EE, KAIST; EE, KAIST", "aff_domain": "kaist.ac.kr;kaist.ac.kr;gmail.com;kaist.ac.kr", "email": "kaist.ac.kr;kaist.ac.kr;gmail.com;kaist.ac.kr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/58238e9ae2dd305d79c2ebc8c1883422-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "KAIST", "aff_unique_dep": "Electrical Engineering", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Lipschitz regularity of deep neural networks: analysis and efficient estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11382", "id": "11382", "author_site": "Aladin Virmaux, Kevin Scaman", "author": "Aladin Virmaux; Kevin Scaman", "abstract": "Deep neural networks are notorious for being sensitive to small well-chosen perturbations, and estimating the regularity of such architectures is of utmost importance for safe and robust practical applications. In this paper, we investigate one of the key characteristics to assess the regularity of such methods: the Lipschitz constant of deep learning architectures. First, we show that, even for two layer neural networks, the exact computation of this quantity is NP-hard and state-of-art methods may significantly overestimate it. Then, we both extend and improve previous estimation methods by providing AutoLip, the first generic algorithm for upper bounding the Lipschitz constant of any automatically differentiable function. We provide a power method algorithm working with automatic differentiation, allowing efficient computations even on large convolutions. Second, for sequential neural networks, we propose an improved algorithm named SeqLip that takes advantage of the linear computation graph to split the computation per pair of consecutive layers. Third we propose heuristics on SeqLip in order to tackle very large networks. Our experiments show that SeqLip can significantly improve on the existing upper bounds. Finally, we provide an implementation of AutoLip in the PyTorch environment that may be used to better estimate the robustness of a given neural network to small perturbations or regularize it using more precise Lipschitz estimations. These results also hint at the difficulty to estimate the Lipschitz constant of deep networks.", "bibtex": "@inproceedings{NEURIPS2018_d54e99a6,\n author = {Virmaux, Aladin and Scaman, Kevin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Lipschitz regularity of deep neural networks: analysis and efficient estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d54e99a6c03704e95e6965532dec148b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d54e99a6c03704e95e6965532dec148b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d54e99a6c03704e95e6965532dec148b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d54e99a6c03704e95e6965532dec148b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d54e99a6c03704e95e6965532dec148b-Reviews.html", "metareview": "", "pdf_size": 422319, "gs_citation": 676, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16196721810320018514&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Huawei Noah\u2019s Ark Lab; Huawei Noah\u2019s Ark Lab", "aff_domain": "huawei.com;huawei.com", "email": "huawei.com;huawei.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d54e99a6c03704e95e6965532dec148b-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Noah\u2019s Ark Lab", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Lipschitz-Margin Training: Scalable Certification of Perturbation Invariance for Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11632", "id": "11632", "author_site": "Yusuke Tsuzuku, Issei Sato, Masashi Sugiyama", "author": "Yusuke Tsuzuku; Issei Sato; Masashi Sugiyama", "abstract": "High sensitivity of neural networks against malicious perturbations on inputs causes security concerns. To take a steady step towards robust classifiers, we aim to create neural network models provably defended from perturbations. Prior certification work requires strong assumptions on network structures and massive computational costs, and thus the range of their applications was limited. From the relationship between the Lipschitz constants and prediction margins, we present a computationally efficient calculation technique to lower-bound the size of adversarial perturbations that can deceive networks, and that is widely applicable to various complicated networks. Moreover, we propose an efficient training procedure that robustifies networks and significantly improves the provably guarded areas around data points. In experimental evaluations, our method showed its ability to provide a non-trivial guarantee and enhance robustness for even large networks.", "bibtex": "@inproceedings{NEURIPS2018_48584348,\n author = {Tsuzuku, Yusuke and Sato, Issei and Sugiyama, Masashi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Lipschitz-Margin Training: Scalable Certification of Perturbation Invariance for Deep Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/485843481a7edacbfce101ecb1e4d2a8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/485843481a7edacbfce101ecb1e4d2a8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/485843481a7edacbfce101ecb1e4d2a8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/485843481a7edacbfce101ecb1e4d2a8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/485843481a7edacbfce101ecb1e4d2a8-Reviews.html", "metareview": "", "pdf_size": 526403, "gs_citation": 362, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17946280354894784321&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "The University of Tokyo+RIKEN; The University of Tokyo+RIKEN; RIKEN+The University of Tokyo", "aff_domain": "ms.k.u-tokyo.ac.jp;k.u-tokyo.ac.jp;k.u-tokyo.ac.jp", "email": "ms.k.u-tokyo.ac.jp;k.u-tokyo.ac.jp;k.u-tokyo.ac.jp", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/485843481a7edacbfce101ecb1e4d2a8-Abstract.html", "aff_unique_index": "0+1;0+1;1+0", "aff_unique_norm": "University of Tokyo;RIKEN", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.riken.jp", "aff_unique_abbr": "UTokyo;RIKEN", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0+0;0+0", "aff_country_unique": "Japan" }, { "title": "Local Differential Privacy for Evolving Data", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11247", "id": "11247", "author_site": "Matthew Joseph, Aaron Roth, Jonathan Ullman, Bo Waggoner", "author": "Matthew Joseph; Aaron Roth; Jonathan Ullman; Bo Waggoner", "abstract": "There are now several large scale deployments of differential privacy used to collect statistical information about users. However, these deployments periodically recollect the data and recompute the statistics using algorithms designed for a single use. As a result, these systems do not provide meaningful privacy guarantees over long time scales. Moreover, existing techniques to mitigate this effect do not apply in the ``local model'' of differential privacy that these systems use.", "bibtex": "@inproceedings{NEURIPS2018_a0161022,\n author = {Joseph, Matthew and Roth, Aaron and Ullman, Jonathan and Waggoner, Bo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Local Differential Privacy for Evolving Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a01610228fe998f515a72dd730294d87-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a01610228fe998f515a72dd730294d87-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a01610228fe998f515a72dd730294d87-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a01610228fe998f515a72dd730294d87-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a01610228fe998f515a72dd730294d87-Reviews.html", "metareview": "", "pdf_size": 311565, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13569105630598499966&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "Computer and Information Science, University of Pennsylvania; Computer and Information Science, University of Pennsylvania; Computer and Information Sciences, Northeastern University; Computer and Information Science, University of Pennsylvania", "aff_domain": "cis.upenn.edu;cis.upenn.edu;ccs.neu.edu;gmail.com", "email": "cis.upenn.edu;cis.upenn.edu;ccs.neu.edu;gmail.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a01610228fe998f515a72dd730294d87-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Pennsylvania;Northeastern University", "aff_unique_dep": "Computer and Information Science;Computer and Information Sciences", "aff_unique_url": "https://www.upenn.edu;https://www.northeastern.edu", "aff_unique_abbr": "UPenn;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Long short-term memory and Learning-to-learn in networks of spiking neurons", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11101", "id": "11101", "author_site": "Guillaume Bellec, Darjan Salaj, Anand Subramoney, Robert Legenstein, Wolfgang Maass", "author": "Guillaume Bellec; Darjan Salaj; Anand Subramoney; Robert Legenstein; Wolfgang Maass", "abstract": "Recurrent networks of spiking neurons (RSNNs) underlie the astounding computing and learning capabilities of the brain. But computing and learning capabilities of RSNN models have remained poor, at least in comparison with ANNs. We address two possible reasons for that. One is that RSNNs in the brain are not randomly connected or designed according to simple rules, and they do not start learning as a tabula rasa network. Rather, RSNNs in the brain were optimized for their tasks through evolution, development, and prior experience. Details of these optimization processes are largely unknown. But their functional contribution can be approximated through powerful optimization methods, such as backpropagation through time (BPTT).", "bibtex": "@inproceedings{NEURIPS2018_c203d8a1,\n author = {Bellec, Guillaume and Salaj, Darjan and Subramoney, Anand and Legenstein, Robert and Maass, Wolfgang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Long short-term memory and Learning-to-learn in networks of spiking neurons},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c203d8a151612acf12457e4d67635a95-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c203d8a151612acf12457e4d67635a95-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c203d8a151612acf12457e4d67635a95-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c203d8a151612acf12457e4d67635a95-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c203d8a151612acf12457e4d67635a95-Reviews.html", "metareview": "", "pdf_size": 1922142, "gs_citation": 650, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6363505648515888175&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Institute for Theoretical Computer Science, Graz University of Technology, Austria; Institute for Theoretical Computer Science, Graz University of Technology, Austria; Institute for Theoretical Computer Science, Graz University of Technology, Austria; Institute for Theoretical Computer Science, Graz University of Technology, Austria; Institute for Theoretical Computer Science, Graz University of Technology, Austria", "aff_domain": "igi.tugraz.at;igi.tugraz.at;igi.tugraz.at;igi.tugraz.at;igi.tugraz.at", "email": "igi.tugraz.at;igi.tugraz.at;igi.tugraz.at;igi.tugraz.at;igi.tugraz.at", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c203d8a151612acf12457e4d67635a95-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Graz University of Technology", "aff_unique_dep": "Institute for Theoretical Computer Science", "aff_unique_url": "https://www.tugraz.at", "aff_unique_abbr": "TU Graz", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Graz", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Austria" }, { "title": "Loss Functions for Multiset Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11563", "id": "11563", "author_site": "Sean Welleck, Zixin Yao, Yu Gai, Jialin Mao, Zheng Zhang, Kyunghyun Cho", "author": "Sean Welleck; Zixin Yao; Yu Gai; Jialin Mao; Zheng Zhang; Kyunghyun Cho", "abstract": "We study the problem of multiset prediction. The goal of multiset prediction is to train a predictor that maps an input to a multiset consisting of multiple items. Unlike existing problems in supervised learning, such as classification, ranking and sequence generation, there is no known order among items in a target multiset, and each item in the multiset may appear more than once, making this problem extremely challenging. In this paper, we propose a novel multiset loss function by viewing this problem from the perspective of sequential decision making. The proposed multiset loss function is empirically evaluated on two families of datasets, one synthetic and the other real, with varying levels of difficulty, against various baseline loss functions including reinforcement learning, sequence, and aggregated distribution matching loss functions. The experiments reveal the effectiveness of the proposed loss function over the others.", "bibtex": "@inproceedings{NEURIPS2018_fb3f7685,\n author = {Welleck, Sean and Yao, Zixin and Gai, Yu and Mao, Jialin and Zhang, Zheng and Cho, Kyunghyun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Loss Functions for Multiset Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fb3f76858cb38e5b7fd113e0bc1c0721-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fb3f76858cb38e5b7fd113e0bc1c0721-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/fb3f76858cb38e5b7fd113e0bc1c0721-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fb3f76858cb38e5b7fd113e0bc1c0721-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fb3f76858cb38e5b7fd113e0bc1c0721-Reviews.html", "metareview": "", "pdf_size": 348694, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=179679515432814537&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "New York University Shanghai; New York University; CIFAR Azrieli Global Scholar; New York University Shanghai; New York University; New York University", "aff_domain": "nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu", "email": "nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu;nyu.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fb3f76858cb38e5b7fd113e0bc1c0721-Abstract.html", "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "New York University;CIFAR", "aff_unique_dep": ";Azrieli Global Scholar", "aff_unique_url": "https://www.nyu.edu;https://www.cifar.ca", "aff_unique_abbr": "NYU;CIFAR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shanghai;", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Loss Surfaces, Mode Connectivity, and Fast Ensembling of DNNs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11839", "id": "11839", "author_site": "Timur Garipov, Pavel Izmailov, Dmitrii Podoprikhin, Dmitry Vetrov, Andrew Wilson", "author": "Timur Garipov; Pavel Izmailov; Dmitrii Podoprikhin; Dmitry P Vetrov; Andrew G Wilson", "abstract": "The loss functions of deep neural networks are complex and their geometric properties are not well understood. We show that the optima of these complex loss functions are in fact connected by simple curves, over which training and test accuracy are nearly constant. We introduce a training procedure to discover these high-accuracy pathways between modes. Inspired by this new geometric insight, we also propose a new ensembling method entitled Fast Geometric Ensembling (FGE). Using FGE we can train high-performing ensembles in the time required to train a single model. We achieve improved performance compared to the recent state-of-the-art Snapshot Ensembles, on CIFAR-10, CIFAR-100, and ImageNet.", "bibtex": "@inproceedings{NEURIPS2018_be3087e7,\n author = {Garipov, Timur and Izmailov, Pavel and Podoprikhin, Dmitrii and Vetrov, Dmitry P and Wilson, Andrew G},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Loss Surfaces, Mode Connectivity, and Fast Ensembling of DNNs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/be3087e74e9100d4bc4c6268cdbe8456-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/be3087e74e9100d4bc4c6268cdbe8456-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/be3087e74e9100d4bc4c6268cdbe8456-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/be3087e74e9100d4bc4c6268cdbe8456-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/be3087e74e9100d4bc4c6268cdbe8456-Reviews.html", "metareview": "", "pdf_size": 474395, "gs_citation": 885, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7857512178594187445&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "Samsung AI Center in Moscow + Skolkovo Institute of Science and Technology; Cornell University; Samsung-HSE Laboratory, National Research University Higher School of Economics; National Research University Higher School of Economics; Cornell University", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/be3087e74e9100d4bc4c6268cdbe8456-Abstract.html", "aff_unique_index": "0+1;2;3;3;2", "aff_unique_norm": "Samsung;Skolkovo Institute of Science and Technology;Cornell University;National Research University Higher School of Economics", "aff_unique_dep": "AI Center;;;Samsung-HSE Laboratory", "aff_unique_url": "https://www.samsung.com/global/innovation/ai-research/;https://www.skoltech.ru;https://www.cornell.edu;https://hse.ru", "aff_unique_abbr": "Samsung AI;Skoltech;Cornell;HSE", "aff_campus_unique_index": "0", "aff_campus_unique": "Moscow;", "aff_country_unique_index": "0+0;1;0;0;1", "aff_country_unique": "Russian Federation;United States" }, { "title": "Low-Rank Tucker Decomposition of Large Tensors Using TensorSketch", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11957", "id": "11957", "author_site": "Osman Asif Malik, Stephen Becker", "author": "Osman Asif Malik; Stephen Becker", "abstract": "We propose two randomized algorithms for low-rank Tucker decomposition of tensors. The algorithms, which incorporate sketching, only require a single pass of the input tensor and can handle tensors whose elements are streamed in any order. To the best of our knowledge, ours are the only algorithms which can do this. We test our algorithms on sparse synthetic data and compare them to multiple other methods. We also apply one of our algorithms to a real dense 38 GB tensor representing a video and use the resulting decomposition to correctly classify frames containing disturbances.", "bibtex": "@inproceedings{NEURIPS2018_45a766fa,\n author = {Malik, Osman Asif and Becker, Stephen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Low-Rank Tucker Decomposition of Large Tensors Using TensorSketch},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/45a766fa266ea2ebeb6680fa139d2a3d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/45a766fa266ea2ebeb6680fa139d2a3d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/45a766fa266ea2ebeb6680fa139d2a3d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/45a766fa266ea2ebeb6680fa139d2a3d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/45a766fa266ea2ebeb6680fa139d2a3d-Reviews.html", "metareview": "", "pdf_size": 1389676, "gs_citation": 146, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14930463506395433719&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Department of Applied Mathematics, University of Colorado Boulder; Department of Applied Mathematics, University of Colorado Boulder", "aff_domain": "colorado.edu;colorado.edu", "email": "colorado.edu;colorado.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/45a766fa266ea2ebeb6680fa139d2a3d-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Colorado Boulder", "aff_unique_dep": "Department of Applied Mathematics", "aff_unique_url": "https://www.colorado.edu", "aff_unique_abbr": "CU Boulder", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Boulder", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Low-rank Interaction with Sparse Additive Effects Model for Large Data Frames", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11537", "id": "11537", "author_site": "Genevi\u00e8ve Robin, Hoi-To Wai, Julie Josse, Olga Klopp, Eric Moulines", "author": "Genevi\u00e8ve Robin; Hoi-To Wai; Julie Josse; Olga Klopp; Eric Moulines", "abstract": "Many applications of machine learning involve the analysis of large data frames -- matrices collecting heterogeneous measurements (binary, numerical, counts, etc.) across samples -- with missing values. Low-rank models, as studied by Udell et al. (2016), are popular in this framework for tasks such as visualization, clustering and missing value imputation. Yet, available methods with statistical guarantees and efficient optimization do not allow explicit modeling of main additive effects such as row and column, or covariate effects. In this paper, we introduce a low-rank interaction and sparse additive effects (LORIS) model which combines matrix regression on a dictionary and low-rank design, to estimate main effects and interactions simultaneously. We provide statistical guarantees in the form of upper bounds on the estimation error of both components. Then, we introduce a mixed coordinate gradient descent (MCGD) method which provably converges sub-linearly to an optimal solution and is computationally efficient for large scale data sets. We show on simulated and survey data that the method has a clear advantage over current practices.", "bibtex": "@inproceedings{NEURIPS2018_f21e255f,\n author = {Robin, Genevi\\`{e}ve and Wai, Hoi-To and Josse, Julie and Klopp, Olga and Moulines, Eric},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Low-rank Interaction with Sparse Additive Effects Model for Large Data Frames},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f21e255f89e0f258accbe4e984eef486-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f21e255f89e0f258accbe4e984eef486-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f21e255f89e0f258accbe4e984eef486-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f21e255f89e0f258accbe4e984eef486-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f21e255f89e0f258accbe4e984eef486-Reviews.html", "metareview": "", "pdf_size": 430358, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8183639508841966766&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Centre de Math\u00e9matiques Appliqu\u00e9es \u00c9cole Polytechnique, XPOP, INRIA; Department of SE&EM The Chinese University of Hong Kong; Centre de Math\u00e9matiques Appliqu\u00e9es \u00c9cole Polytechnique, XPOP, INRIA; ESSEC Business School CREST, ENSAE; Centre de Math\u00e9matiques Appliqu\u00e9es \u00c9cole Polytechnique, XPOP, INRIA", "aff_domain": "polytechnique.edu;se.cuhk.edu.hk;polytechnique.edu;essec.edu;polytechnique.edu", "email": "polytechnique.edu;se.cuhk.edu.hk;polytechnique.edu;essec.edu;polytechnique.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f21e255f89e0f258accbe4e984eef486-Abstract.html", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Ecole Polytechnique;Chinese University of Hong Kong;ESSEC Business School", "aff_unique_dep": "Centre de Math\u00e9matiques Appliqu\u00e9es;Department of SE&EM;CREST", "aff_unique_url": "https://www.polytechnique.edu;https://www.cuhk.edu.hk;https://www.essec.edu", "aff_unique_abbr": "X;CUHK;ESSEC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "France;China" }, { "title": "Low-shot Learning via Covariance-Preserving Adversarial Augmentation Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11118", "id": "11118", "author_site": "Hang Gao, Zheng Shou, Alireza Zareian, Hanwang Zhang, Shih-Fu Chang", "author": "Hang Gao; Zheng Shou; Alireza Zareian; Hanwang Zhang; Shih-Fu Chang", "abstract": "Deep neural networks suffer from over-fitting and catastrophic forgetting when trained with small data. One natural remedy for this problem is data augmentation, which has been recently shown to be effective. However, previous works either assume that intra-class variances can always be generalized to new classes, or employ naive generation methods to hallucinate finite examples without modeling their latent distributions. In this work, we propose Covariance-Preserving Adversarial Augmentation Networks to overcome existing limits of low-shot learning. Specifically, a novel Generative Adversarial Network is designed to model the latent distribution of each novel class given its related base counterparts. Since direct estimation on novel classes can be inductively biased, we explicitly preserve covariance information as the ``variability'' of base examples during the generation process. Empirical results show that our model can generate realistic yet diverse examples, leading to substantial improvements on the ImageNet benchmark over the state of the art.", "bibtex": "@inproceedings{NEURIPS2018_81448138,\n author = {Gao, Hang and Shou, Zheng and Zareian, Alireza and Zhang, Hanwang and Chang, Shih-Fu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Low-shot Learning via Covariance-Preserving Adversarial Augmentation Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/81448138f5f163ccdba4acc69819f280-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/81448138f5f163ccdba4acc69819f280-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/81448138f5f163ccdba4acc69819f280-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/81448138f5f163ccdba4acc69819f280-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/81448138f5f163ccdba4acc69819f280-Reviews.html", "metareview": "", "pdf_size": 2642515, "gs_citation": 182, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17105477081465006147&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Columbia University; Columbia University; Columbia University; Nanyang Technological University; Columbia University", "aff_domain": "columbia.edu;columbia.edu;columbia.edu;ntu.edu.sg;columbia.edu", "email": "columbia.edu;columbia.edu;columbia.edu;ntu.edu.sg;columbia.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/81448138f5f163ccdba4acc69819f280-Abstract.html", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Columbia University;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.ntu.edu.sg", "aff_unique_abbr": "Columbia;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Singapore" }, { "title": "M-Walk: Learning to Walk over Graphs using Monte Carlo Tree Search", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11655", "id": "11655", "author_site": "Yelong Shen, Jianshu Chen, Po-Sen Huang, Yuqing Guo, Jianfeng Gao", "author": "Yelong Shen; Jianshu Chen; Po-Sen Huang; Yuqing Guo; Jianfeng Gao", "abstract": "Learning to walk over a graph towards a target node for a given query and a source node is an important problem in applications such as knowledge base completion (KBC). It can be formulated as a reinforcement learning (RL) problem with a known state transition model. To overcome the challenge of sparse rewards, we develop a graph-walking agent called M-Walk, which consists of a deep recurrent neural network (RNN) and Monte Carlo Tree Search (MCTS). The RNN encodes the state (i.e., history of the walked path) and maps it separately to a policy and Q-values. In order to effectively train the agent from sparse rewards, we combine MCTS with the neural policy to generate trajectories yielding more positive rewards. From these trajectories, the network is improved in an off-policy manner using Q-learning, which modifies the RNN policy via parameter sharing. Our proposed RL algorithm repeatedly applies this policy-improvement step to learn the model. At test time, MCTS is combined with the neural policy to predict the target node. Experimental results on several graph-walking benchmarks show that M-Walk is able to learn better policies than other RL-based methods, which are mainly based on policy gradients. M-Walk also outperforms traditional KBC baselines.", "bibtex": "@inproceedings{NEURIPS2018_c6f798b8,\n author = {Shen, Yelong and Chen, Jianshu and Huang, Po-Sen and Guo, Yuqing and Gao, Jianfeng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {M-Walk: Learning to Walk over Graphs using Monte Carlo Tree Search},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c6f798b844366ccd65d99bc7f31e0e02-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c6f798b844366ccd65d99bc7f31e0e02-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c6f798b844366ccd65d99bc7f31e0e02-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c6f798b844366ccd65d99bc7f31e0e02-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c6f798b844366ccd65d99bc7f31e0e02-Reviews.html", "metareview": "", "pdf_size": 2826696, "gs_citation": 190, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15536314796264337400&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Tencent AI Lab, Bellevue, WA, USA; Tencent AI Lab, Bellevue, WA, USA; Microsoft Research, Redmond, WA, USA + DeepMind; Microsoft Research, Redmond, WA, USA; Microsoft Research, Redmond, WA, USA", "aff_domain": "tencent.com;tencent.com;google.com;microsoft.com;microsoft.com", "email": "tencent.com;tencent.com;google.com;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c6f798b844366ccd65d99bc7f31e0e02-Abstract.html", "aff_unique_index": "0;0;1+2;1;1", "aff_unique_norm": "Tencent;Microsoft;DeepMind", "aff_unique_dep": "AI Lab;Microsoft Research;", "aff_unique_url": "https://ai.tencent.com;https://www.microsoft.com/en-us/research;https://deepmind.com", "aff_unique_abbr": "Tencent AI Lab;MSR;DeepMind", "aff_campus_unique_index": "0;0;1;1;1", "aff_campus_unique": "Bellevue;Redmond;", "aff_country_unique_index": "0;0;0+1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "MULAN: A Blind and Off-Grid Method for Multichannel Echo Retrieval", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11229", "id": "11229", "author_site": "Helena Peic Tukuljac, Antoine Deleforge, Remi Gribonval", "author": "Helena Peic Tukuljac; Antoine Deleforge; Remi Gribonval", "abstract": "This paper addresses the general problem of blind echo retrieval, i.e., given M sensors measuring in the discrete-time domain M mixtures of K delayed and attenuated copies of an unknown source signal, can the echo location and weights be recovered? This problem has broad applications in fields such as sonars, seismology, ultrasounds or room acoustics. It belongs to the broader class of blind channel identification problems, which have been intensively studied in signal processing. All existing methods proceed in two steps: (i) blind estimation of sparse discrete-time filters and (ii) echo information retrieval by peak picking. The precision of these methods is fundamentally limited by the rate at which the signals are sampled: estimated echo locations are necessary on-grid, and since true locations never match the sampling grid, the weight estimation precision is also strongly limited. This is the so-called basis-mismatch problem in compressed sensing. We propose a radically different approach to the problem, building on top of the framework of finite-rate-of-innovation sampling. The approach operates directly in the parameter-space of echo locations and weights, and enables near-exact blind and off-grid echo retrieval from discrete-time measurements. It is shown to outperform conventional methods by several orders of magnitudes in precision.", "bibtex": "@inproceedings{NEURIPS2018_c9f95a0a,\n author = {Peic Tukuljac, Helena and Deleforge, Antoine and Gribonval, Remi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {MULAN: A Blind and Off-Grid Method for Multichannel Echo Retrieval},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c9f95a0a5af052bffce5c89917335f67-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c9f95a0a5af052bffce5c89917335f67-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c9f95a0a5af052bffce5c89917335f67-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c9f95a0a5af052bffce5c89917335f67-Reviews.html", "metareview": "", "pdf_size": 413475, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=88608764706264858&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Department of Computer and Communication Sciences, \u00c9cole polytechnique f\u00e9d\u00e9rale de Lausanne; Universit\u00e9 de Lorraine, CNRS, Inria, LORIA; Univ Rennes, Inria, CNRS, IRISA", "aff_domain": "epfl.ch;inria.fr;inria.fr", "email": "epfl.ch;inria.fr;inria.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c9f95a0a5af052bffce5c89917335f67-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "\u00c9cole polytechnique f\u00e9d\u00e9rale de Lausanne;Universit\u00e9 de Lorraine;University of Rennes", "aff_unique_dep": "Department of Computer and Communication Sciences;;", "aff_unique_url": "https://www.epfl.ch;https://www.univ-lorraine.fr;https://www.univ-rennes1.fr", "aff_unique_abbr": "EPFL;UL;Univ Rennes", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Switzerland;France" }, { "title": "MacNet: Transferring Knowledge from Machine Comprehension to Sequence-to-Sequence Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11591", "id": "11591", "author_site": "Boyuan Pan, Yazheng Yang, Hao Li, Zhou Zhao, Yueting Zhuang, Deng Cai, Xiaofei He", "author": "Boyuan Pan; Yazheng Yang; Hao Li; Zhou Zhao; Yueting Zhuang; Deng Cai; Xiaofei He", "abstract": "Machine Comprehension (MC) is one of the core problems in natural language processing, requiring both understanding of the natural language and knowledge about the world. Rapid progress has been made since the release of several benchmark datasets, and recently the state-of-the-art models even surpass human performance on the well-known SQuAD evaluation. In this paper, we transfer knowledge learned from machine comprehension to the sequence-to-sequence tasks to deepen the understanding of the text. We propose MacNet: a novel encoder-decoder supplementary architecture to the widely used attention-based sequence-to-sequence models. Experiments on neural machine translation (NMT) and abstractive text summarization show that our proposed framework can significantly improve the performance of the baseline models, and our method for the abstractive text summarization achieves the state-of-the-art results on the Gigaword dataset.", "bibtex": "@inproceedings{NEURIPS2018_908c9a56,\n author = {Pan, Boyuan and Yang, Yazheng and Li, Hao and Zhao, Zhou and Zhuang, Yueting and Cai, Deng and He, Xiaofei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {MacNet: Transferring Knowledge from Machine Comprehension to Sequence-to-Sequence Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/908c9a564a86426585b29f5335b619bc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/908c9a564a86426585b29f5335b619bc-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/908c9a564a86426585b29f5335b619bc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/908c9a564a86426585b29f5335b619bc-Reviews.html", "metareview": "", "pdf_size": 485614, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=777559980806250763&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "State Key Lab of CAD &CG, Zhejiang University + Alibaba-Zhejiang University Joint Institute of Frontier Technologies; College of Computer Science, Zhejiang University; State Key Lab of CAD &CG, Zhejiang University + Alibaba-Zhejiang University Joint Institute of Frontier Technologies; College of Computer Science, Zhejiang University; College of Computer Science, Zhejiang University; State Key Lab of CAD &CG, Zhejiang University + Alibaba-Zhejiang University Joint Institute of Frontier Technologies; Fabu Inc., Hangzhou, China", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;fabu.ai", "email": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;fabu.ai", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/908c9a564a86426585b29f5335b619bc-Abstract.html", "aff_unique_index": "0+0;0;0+0;0;0;0+0;1", "aff_unique_norm": "Zhejiang University;Fabu Inc.", "aff_unique_dep": "State Key Lab of CAD &CG;", "aff_unique_url": "http://www.zju.edu.cn;", "aff_unique_abbr": "ZJU;", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;0+0;0;0;0+0;0", "aff_country_unique": "China" }, { "title": "Mallows Models for Top-k Lists", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11433", "id": "11433", "author_site": "Flavio Chierichetti, Anirban Dasgupta, Shahrzad Haddadan, Ravi Kumar, Silvio Lattanzi", "author": "Flavio Chierichetti; Anirban Dasgupta; Shahrzad Haddadan; Ravi Kumar; Silvio Lattanzi", "abstract": "The classic Mallows model is a widely-used tool to realize distributions on per- mutations. Motivated by common practical situations, in this paper, we generalize Mallows to model distributions on top-k lists by using a suitable distance measure between top-k lists. Unlike many earlier works, our model is both analytically tractable and computationally efficient. We demonstrate this by studying two basic problems in this model, namely, sampling and reconstruction, from both algorithmic and experimental points of view.", "bibtex": "@inproceedings{NEURIPS2018_a381c2c3,\n author = {Chierichetti, Flavio and Dasgupta, Anirban and Haddadan, Shahrzad and Kumar, Ravi and Lattanzi, Silvio},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mallows Models for Top-k Lists},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a381c2c35c9157f6b67fd07d5a200ae1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a381c2c35c9157f6b67fd07d5a200ae1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a381c2c35c9157f6b67fd07d5a200ae1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a381c2c35c9157f6b67fd07d5a200ae1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a381c2c35c9157f6b67fd07d5a200ae1-Reviews.html", "metareview": "", "pdf_size": 536954, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4729466617385906235&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Sapienza University, Rome, Italy; IIT, Gandhinagar, India; Sapienza University, Rome, Italy; Google, Mountain View, CA; Google, Zurich, Switzerland", "aff_domain": "di.uniroma1.it;gmail.com;uniroma1.it;gmail.com;gmail.com", "email": "di.uniroma1.it;gmail.com;uniroma1.it;gmail.com;gmail.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a381c2c35c9157f6b67fd07d5a200ae1-Abstract.html", "aff_unique_index": "0;1;0;2;2", "aff_unique_norm": "Sapienza University;Indian Institute of Technology Gandhinagar;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.uniroma1.it;https://www.iitgn.ac.in;https://www.google.com", "aff_unique_abbr": "Sapienza;IITGN;Google", "aff_campus_unique_index": "0;1;0;2;3", "aff_campus_unique": "Rome;Gandhinagar;Mountain View;Zurich", "aff_country_unique_index": "0;1;0;2;3", "aff_country_unique": "Italy;India;United States;Switzerland" }, { "title": "Manifold Structured Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11547", "id": "11547", "author_site": "Alessandro Rudi, Carlo Ciliberto, Gian Maria Marconi, Lorenzo Rosasco", "author": "Alessandro Rudi; Carlo Ciliberto; GianMaria Marconi; Lorenzo Rosasco", "abstract": "Structured prediction provides a general framework to deal with supervised problems where the outputs have semantically rich structure. While classical approaches consider finite, albeit potentially huge, output spaces, in this paper we discuss how structured prediction can be extended to a continuous scenario. Specifically, we study a structured prediction approach to manifold-valued regression. We characterize a class of problems for which the considered approach is statistically consistent and study how geometric optimization can be used to compute the corresponding estimator. Promising experimental results on both simulated and real data complete our study.", "bibtex": "@inproceedings{NEURIPS2018_f6185f0e,\n author = {Rudi, Alessandro and Ciliberto, Carlo and Marconi, GianMaria and Rosasco, Lorenzo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Manifold Structured Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f6185f0ef02dcaec414a3171cd01c697-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f6185f0ef02dcaec414a3171cd01c697-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f6185f0ef02dcaec414a3171cd01c697-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f6185f0ef02dcaec414a3171cd01c697-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f6185f0ef02dcaec414a3171cd01c697-Reviews.html", "metareview": "", "pdf_size": 518457, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3063976344008945098&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f6185f0ef02dcaec414a3171cd01c697-Abstract.html" }, { "title": "Manifold-tiling Localized Receptive Fields are Optimal in Similarity-preserving Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11682", "id": "11682", "author_site": "Anirvan Sengupta, Cengiz Pehlevan, Mariano Tepper, Alexander Genkin, Dmitri Chklovskii", "author": "Anirvan Sengupta; Cengiz Pehlevan; Mariano Tepper; Alexander Genkin; Dmitri Chklovskii", "abstract": "Many neurons in the brain, such as place cells in the rodent hippocampus, have localized receptive fields, i.e., they respond to a small neighborhood of stimulus space. What is the functional significance of such representations and how can they arise? Here, we propose that localized receptive fields emerge in similarity-preserving networks of rectifying neurons that learn low-dimensional manifolds populated by sensory inputs. Numerical simulations of such networks on standard datasets yield manifold-tiling localized receptive fields. More generally, we show analytically that, for data lying on symmetric manifolds, optimal solutions of objectives, from which similarity-preserving networks are derived, have localized receptive fields. Therefore, nonnegative similarity-preserving mapping (NSM) implemented by neural networks can model representations of continuous manifolds in the brain.", "bibtex": "@inproceedings{NEURIPS2018_ee14c41e,\n author = {Sengupta, Anirvan and Pehlevan, Cengiz and Tepper, Mariano and Genkin, Alexander and Chklovskii, Dmitri},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Manifold-tiling Localized Receptive Fields are Optimal in Similarity-preserving Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ee14c41e92ec5c97b54cf9b74e25bd99-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ee14c41e92ec5c97b54cf9b74e25bd99-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ee14c41e92ec5c97b54cf9b74e25bd99-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ee14c41e92ec5c97b54cf9b74e25bd99-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ee14c41e92ec5c97b54cf9b74e25bd99-Reviews.html", "metareview": "", "pdf_size": 5146488, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1758414387739465296&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Rutgers University\u2021Flatiron Institute; Flatiron Institute\u21e4; Flatiron Institute\u21e4; NYU Langone Medical Center; Flatiron Institute\u00a7NYU Langone Medical Center", "aff_domain": "physics.rutgers.edu;flatironinstitute.org;flatironinstitute.org;gmail.com;flatironinstitute.org", "email": "physics.rutgers.edu;flatironinstitute.org;flatironinstitute.org;gmail.com;flatironinstitute.org", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ee14c41e92ec5c97b54cf9b74e25bd99-Abstract.html", "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Rutgers University;Flatiron Institute;NYU Langone Medical Center", "aff_unique_dep": ";;", "aff_unique_url": "https://www.rutgers.edu;https://flatironinstitute.org;https://nyulangone.org", "aff_unique_abbr": "Rutgers;Flatiron;NYU Langone", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Mapping Images to Scene Graphs with Permutation-Invariant Structured Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11694", "id": "11694", "author_site": "Roei Herzig, Moshiko Raboh, Gal Chechik, Jonathan Berant, Amir Globerson", "author": "Roei Herzig; Moshiko Raboh; Gal Chechik; Jonathan Berant; Amir Globerson", "abstract": "Machine understanding of complex images is a key goal of artificial intelligence. One challenge underlying this task is that visual scenes contain multiple inter-related objects, and that global context plays an important role in interpreting the scene. A natural modeling framework for capturing such effects is structured prediction, which optimizes over complex labels, while modeling within-label interactions. However, it is unclear what principles should guide the design of a structured prediction model that utilizes the power of deep learning components. Here we propose a design principle for such architectures that follows from a natural requirement of permutation invariance. We prove a necessary and sufficient characterization for architectures that follow this invariance, and discuss its implication on model design. Finally, we show that the resulting model achieves new state of the art results on the Visual Genome scene graph labeling benchmark, outperforming all recent approaches.", "bibtex": "@inproceedings{NEURIPS2018_2668a710,\n author = {Herzig, Roei and Raboh, Moshiko and Chechik, Gal and Berant, Jonathan and Globerson, Amir},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mapping Images to Scene Graphs with Permutation-Invariant Structured Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2668a7105966cae6e23901495176b8f9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2668a7105966cae6e23901495176b8f9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2668a7105966cae6e23901495176b8f9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2668a7105966cae6e23901495176b8f9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2668a7105966cae6e23901495176b8f9-Reviews.html", "metareview": "", "pdf_size": 2058072, "gs_citation": 150, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10299834729999374704&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Tel Aviv University; Tel Aviv University; Bar-Ilan University + NVIDIA Research; Tel Aviv University + AI2; Tel Aviv University", "aff_domain": "mail.tau.ac.il;mail.tau.ac.il;biu.ac.il;cs.tau.ac.il;post.tau.ac.il", "email": "mail.tau.ac.il;mail.tau.ac.il;biu.ac.il;cs.tau.ac.il;post.tau.ac.il", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2668a7105966cae6e23901495176b8f9-Abstract.html", "aff_unique_index": "0;0;1+2;0+3;0", "aff_unique_norm": "Tel Aviv University;Bar-Ilan University;NVIDIA;AI2", "aff_unique_dep": ";;NVIDIA Research;", "aff_unique_url": "https://www.tau.ac.il;https://www.biu.ac.il;https://www.nvidia.com/research;https://www.ai2.edu", "aff_unique_abbr": "TAU;BIU;NVIDIA;AI2", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+1;0+1;0", "aff_country_unique": "Israel;United States" }, { "title": "Masking: A New Perspective of Noisy Supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11568", "id": "11568", "author_site": "Bo Han, Jiangchao Yao, Gang Niu, Mingyuan Zhou, Ivor Tsang, Ya Zhang, Masashi Sugiyama", "author": "Bo Han; Jiangchao Yao; Gang Niu; Mingyuan Zhou; Ivor Tsang; Ya Zhang; Masashi Sugiyama", "abstract": "It is important to learn various types of classifiers given training data with noisy labels. Noisy labels, in the most popular noise model hitherto, are corrupted from ground-truth labels by an unknown noise transition matrix. Thus, by estimating this matrix, classifiers can escape from overfitting those noisy labels. However, such estimation is practically difficult, due to either the indirect nature of two-step approaches, or not big enough data to afford end-to-end approaches. In this paper, we propose a human-assisted approach called ''Masking'' that conveys human cognition of invalid class transitions and naturally speculates the structure of the noise transition matrix. To this end, we derive a structure-aware probabilistic model incorporating a structure prior, and solve the challenges from structure extraction and structure alignment. Thanks to Masking, we only estimate unmasked noise transition probabilities and the burden of estimation is tremendously reduced. We conduct extensive experiments on CIFAR-10 and CIFAR-100 with three noise structures as well as the industrial-level Clothing1M with agnostic noise structure, and the results show that Masking can improve the robustness of classifiers significantly.", "bibtex": "@inproceedings{NEURIPS2018_aee92f16,\n author = {Han, Bo and Yao, Jiangchao and Niu, Gang and Zhou, Mingyuan and Tsang, Ivor and Zhang, Ya and Sugiyama, Masashi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Masking: A New Perspective of Noisy Supervision},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/aee92f16efd522b9326c25cc3237ac15-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/aee92f16efd522b9326c25cc3237ac15-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/aee92f16efd522b9326c25cc3237ac15-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/aee92f16efd522b9326c25cc3237ac15-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/aee92f16efd522b9326c25cc3237ac15-Reviews.html", "metareview": "", "pdf_size": 2109997, "gs_citation": 304, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10612946092230113975&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": ";;;;;;", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "https://github.com/bhanML/Masking", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/aee92f16efd522b9326c25cc3237ac15-Abstract.html" }, { "title": "Maximizing Induced Cardinality Under a Determinantal Point Process", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11666", "id": "11666", "author_site": "Jennifer Gillenwater, Alex Kulesza, Sergei Vassilvitskii, Zelda Mariet", "author": "Jennifer A Gillenwater; Alex Kulesza; Sergei Vassilvitskii; Zelda E. Mariet", "abstract": "Determinantal point processes (DPPs) are well-suited to recommender systems where the goal is to generate collections of diverse, high-quality items. In the existing literature this is usually formulated as finding the mode of the DPP (the so-called MAP set). However, the MAP objective inherently assumes that the DPP models \"optimal\" recommendation sets, and yet obtaining such a DPP is nontrivial when there is no ready source of example optimal sets. In this paper we advocate an alternative framework for applying DPPs to recommender systems. Our approach assumes that the DPP simply models user engagements with recommended items, which is more consistent with how DPPs for recommender systems are typically trained. With this assumption, we are able to formulate a metric that measures the expected number of items that a user will engage with. We formalize this optimization of this metric as the Maximum Induced Cardinality (MIC) problem. Although the MIC objective is not submodular, we show that it can be approximated by a submodular function, and that empirically it is well-optimized by a greedy algorithm.", "bibtex": "@inproceedings{NEURIPS2018_758be1f9,\n author = {Gillenwater, Jennifer A and Kulesza, Alex and Vassilvitskii, Sergei and Mariet, Zelda E.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Maximizing Induced Cardinality Under a Determinantal Point Process},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/758be1f9f7a7efac938ed8bd97c0e1cb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/758be1f9f7a7efac938ed8bd97c0e1cb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/758be1f9f7a7efac938ed8bd97c0e1cb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/758be1f9f7a7efac938ed8bd97c0e1cb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/758be1f9f7a7efac938ed8bd97c0e1cb-Reviews.html", "metareview": "", "pdf_size": 418177, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=896873493236288697&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Google Research NYC; Google Research NYC; Massachusetts Institute of Technology; Google Research NYC", "aff_domain": "google.com;google.com;csail.mit.edu;google.com", "email": "google.com;google.com;csail.mit.edu;google.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/758be1f9f7a7efac938ed8bd97c0e1cb-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Google;Massachusetts Institute of Technology", "aff_unique_dep": "Google Research;", "aff_unique_url": "https://research.google;https://web.mit.edu", "aff_unique_abbr": "Google Research;MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "New York City;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Maximizing acquisition functions for Bayesian optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11938", "id": "11938", "author_site": "James Wilson, Frank Hutter, Marc Deisenroth", "author": "James Wilson; Frank Hutter; Marc Deisenroth", "abstract": "Bayesian optimization is a sample-efficient approach to global optimization that relies on theoretically motivated value heuristics (acquisition functions) to guide its search process. Fully maximizing acquisition functions produces the Bayes' decision rule, but this ideal is difficult to achieve since these functions are frequently non-trivial to optimize. This statement is especially true when evaluating queries in parallel, where acquisition functions are routinely non-convex, high-dimensional, and intractable. We first show that acquisition functions estimated via Monte Carlo integration are consistently amenable to gradient-based optimization. Subsequently, we identify a common family of acquisition functions, including EI and UCB, whose characteristics not only facilitate but justify use of greedy approaches for their maximization.", "bibtex": "@inproceedings{NEURIPS2018_498f2c21,\n author = {Wilson, James and Hutter, Frank and Deisenroth, Marc},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Maximizing acquisition functions for Bayesian optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/498f2c21688f6451d9f5fd09d53edda7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/498f2c21688f6451d9f5fd09d53edda7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/498f2c21688f6451d9f5fd09d53edda7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/498f2c21688f6451d9f5fd09d53edda7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/498f2c21688f6451d9f5fd09d53edda7-Reviews.html", "metareview": "", "pdf_size": 6085858, "gs_citation": 332, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11738793638453145569&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Imperial College London; University of Freiburg; Imperial College London+PROWLER.io", "aff_domain": "imperial.ac.uk; ; ", "email": "imperial.ac.uk; ; ", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/498f2c21688f6451d9f5fd09d53edda7-Abstract.html", "aff_unique_index": "0;1;0+2", "aff_unique_norm": "Imperial College London;University of Freiburg;PROWLER.io", "aff_unique_dep": ";;", "aff_unique_url": "https://www.imperial.ac.uk;https://www.uni-freiburg.de;https://prowler.io", "aff_unique_abbr": "ICL;UoF;PROWLER.io", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0+0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Maximum Causal Tsallis Entropy Imitation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11435", "id": "11435", "author_site": "Kyungjae Lee, Sungjoon Choi, Songhwai Oh", "author": "Kyungjae Lee; Sungjoon Choi; Songhwai Oh", "abstract": "In this paper, we propose a novel maximum causal Tsallis entropy (MCTE) framework for imitation learning which can efficiently learn a sparse multi-modal policy distribution from demonstrations. We provide the full mathematical analysis of the proposed framework. First, the optimal solution of an MCTE problem is shown to be a sparsemax distribution, whose supporting set can be adjusted. \nThe proposed method has advantages over a softmax distribution in that it can exclude unnecessary actions by assigning zero probability. Second, we prove that an MCTE problem is equivalent to robust Bayes estimation in the sense of the Brier score. Third, we propose a maximum causal Tsallis entropy imitation learning\n(MCTEIL) algorithm with a sparse mixture density network (sparse MDN) by modeling mixture weights using a sparsemax distribution. In particular, we show that the causal Tsallis entropy of an MDN encourages exploration and efficient mixture utilization while Boltzmann Gibbs entropy is less effective. We validate the proposed method in two simulation studies and MCTEIL outperforms existing imitation learning methods in terms of average returns and learning multi-modal policies.", "bibtex": "@inproceedings{NEURIPS2018_2596a54c,\n author = {Lee, Kyungjae and Choi, Sungjoon and Oh, Songhwai},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Maximum Causal Tsallis Entropy Imitation Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2596a54cdbb555cfd09cd5d991da0f55-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2596a54cdbb555cfd09cd5d991da0f55-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2596a54cdbb555cfd09cd5d991da0f55-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2596a54cdbb555cfd09cd5d991da0f55-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2596a54cdbb555cfd09cd5d991da0f55-Reviews.html", "metareview": "", "pdf_size": 1266381, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8953814724495687475&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Dep. of Electrical and Computer Engineering and ASRI, Seoul National University; Kakao Brain; Dep. of Electrical and Computer Engineering and ASRI, Seoul National University", "aff_domain": "rllab.snu.ac.kr;kakaobrain.com;snu.ac.kr", "email": "rllab.snu.ac.kr;kakaobrain.com;snu.ac.kr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2596a54cdbb555cfd09cd5d991da0f55-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Seoul National University;Kakao Brain", "aff_unique_dep": "Department of Electrical and Computer Engineering;", "aff_unique_url": "https://www.snu.ac.kr;https://brain.kakao.com", "aff_unique_abbr": "SNU;Kakao Brain", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seoul;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Maximum-Entropy Fine Grained Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11086", "id": "11086", "author_site": "Abhimanyu Dubey, Otkrist Gupta, Ramesh Raskar, Nikhil Naik", "author": "Abhimanyu Dubey; Otkrist Gupta; Ramesh Raskar; Nikhil Naik", "abstract": "Fine-Grained Visual Classification (FGVC) is an important computer vision problem that involves small diversity within the different classes, and often requires expert annotators to collect data. Utilizing this notion of small visual diversity, we revisit Maximum-Entropy learning in the context of fine-grained classification, and provide a training routine that maximizes the entropy of the output probability distribution for training convolutional neural networks on FGVC tasks. We provide a theoretical as well as empirical justification of our approach, and achieve state-of-the-art performance across a variety of classification tasks in FGVC, that can potentially be extended to any fine-tuning task. Our method is robust to different hyperparameter values, amount of training data and amount of training label noise and can hence be a valuable tool in many similar problems.", "bibtex": "@inproceedings{NEURIPS2018_0c74b7f7,\n author = {Dubey, Abhimanyu and Gupta, Otkrist and Raskar, Ramesh and Naik, Nikhil},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Maximum-Entropy Fine Grained Classification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0c74b7f78409a4022a2c4c5a5ca3ee19-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0c74b7f78409a4022a2c4c5a5ca3ee19-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0c74b7f78409a4022a2c4c5a5ca3ee19-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0c74b7f78409a4022a2c4c5a5ca3ee19-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0c74b7f78409a4022a2c4c5a5ca3ee19-Reviews.html", "metareview": "", "pdf_size": 1303432, "gs_citation": 241, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6237195009703834993&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0c74b7f78409a4022a2c4c5a5ca3ee19-Abstract.html" }, { "title": "Mean Field for the Stochastic Blockmodel: Optimization Landscape and Convergence Issues", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12011", "id": "12011", "author_site": "Soumendu Sundar Mukherjee, Purnamrita Sarkar, Y. X. Rachel Wang, Bowei Yan", "author": "Soumendu Sundar Mukherjee; Purnamrita Sarkar; Y. X. Rachel Wang; Bowei Yan", "abstract": "Variational approximation has been widely used in large-scale Bayesian inference recently, the simplest kind of which involves imposing a mean field assumption to approximate complicated latent structures. Despite the computational scalability of mean field, theoretical studies of its loss function surface and the convergence behavior of iterative updates for optimizing the loss are far from complete. In this paper, we focus on the problem of community detection for a simple two-class Stochastic Blockmodel (SBM). Using batch co-ordinate ascent (BCAVI) for updates, we give a complete characterization of all the critical points and show different convergence behaviors with respect to initializations. When the parameters are known, we show a significant proportion of random initializations will converge to ground truth. On the other hand, when the parameters themselves need to be estimated, a random initialization will converge to an uninformative local optimum.", "bibtex": "@inproceedings{NEURIPS2018_dbb240d2,\n author = {Mukherjee, Soumendu Sundar and Sarkar, Purnamrita and Wang, Y. X. Rachel and Yan, Bowei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mean Field for the Stochastic Blockmodel: Optimization Landscape and Convergence Issues},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dbb240d23ce3d732b67bcfbae5956b18-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dbb240d23ce3d732b67bcfbae5956b18-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dbb240d23ce3d732b67bcfbae5956b18-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dbb240d23ce3d732b67bcfbae5956b18-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dbb240d23ce3d732b67bcfbae5956b18-Reviews.html", "metareview": "", "pdf_size": 687093, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14165991272071282597&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Interdisciplinary Statistical Research Unit (ISRU), Indian Statistical Institute, Kolkata; Department of Statistics and Data Science, University of Texas, Austin; School of Mathematics and Statistics, University of Sydney; Department of Statistics and Data Science, University of Texas, Austin", "aff_domain": "gmail.com;austin.utexas.edu;sydney.edu.au;utexas.edu", "email": "gmail.com;austin.utexas.edu;sydney.edu.au;utexas.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dbb240d23ce3d732b67bcfbae5956b18-Abstract.html", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Indian Statistical Institute;University of Texas at Austin;University of Sydney", "aff_unique_dep": "Interdisciplinary Statistical Research Unit (ISRU);Department of Statistics and Data Science;School of Mathematics and Statistics", "aff_unique_url": "https://www.isical.ac.in;https://www.utexas.edu;https://www.sydney.edu.au", "aff_unique_abbr": "ISI Kolkata;UT Austin;USYD", "aff_campus_unique_index": "0;1;2;1", "aff_campus_unique": "Kolkata;Austin;Sydney", "aff_country_unique_index": "0;1;2;1", "aff_country_unique": "India;United States;Australia" }, { "title": "Mean-field theory of graph neural networks in graph partitioning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11431", "id": "11431", "author_site": "Tatsuro Kawamoto, Masashi Tsubaki, Tomoyuki Obuchi", "author": "Tatsuro Kawamoto; Masashi Tsubaki; Tomoyuki Obuchi", "abstract": "A theoretical performance analysis of the graph neural network (GNN) is presented. For classification tasks, the neural network approach has the advantage in terms of flexibility that it can be employed in a data-driven manner, whereas Bayesian inference requires the assumption of a specific model. A fundamental question is then whether GNN has a high accuracy in addition to this flexibility. Moreover, whether the achieved performance is predominately a result of the backpropagation or the architecture itself is a matter of considerable interest. To gain a better insight into these questions, a mean-field theory of a minimal GNN architecture is developed for the graph partitioning problem. This demonstrates a good agreement with numerical experiments.", "bibtex": "@inproceedings{NEURIPS2018_f6e794a7,\n author = {Kawamoto, Tatsuro and Tsubaki, Masashi and Obuchi, Tomoyuki},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mean-field theory of graph neural networks in graph partitioning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f6e794a75c5d51de081dbefa224304f9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f6e794a75c5d51de081dbefa224304f9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f6e794a75c5d51de081dbefa224304f9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f6e794a75c5d51de081dbefa224304f9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f6e794a75c5d51de081dbefa224304f9-Reviews.html", "metareview": "", "pdf_size": 378310, "gs_citation": 78, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16208932554208741784&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Arti\ufb01cial Intelligence Research Center, National Institute of Advanced Industrial Science and Technology, 2-3-26 Aomi, Koto-ku, Tokyo, Japan; Arti\ufb01cial Intelligence Research Center, National Institute of Advanced Industrial Science and Technology, 2-3-26 Aomi, Koto-ku, Tokyo, Japan; Department of Mathematical and Computing Science, Tokyo Institute of Technology, 2-12-1 Ookayama Meguro-ku Tokyo, Japan", "aff_domain": "aist.go.jp;aist.go.jp;c.titech.ac.jp", "email": "aist.go.jp;aist.go.jp;c.titech.ac.jp", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f6e794a75c5d51de081dbefa224304f9-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "National Institute of Advanced Industrial Science and Technology;Tokyo Institute of Technology", "aff_unique_dep": "Arti\ufb01cial Intelligence Research Center;Department of Mathematical and Computing Science", "aff_unique_url": "https://www.aist.go.jp;https://www.titech.ac.jp", "aff_unique_abbr": "AIST;Titech", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Tokyo;Ookayama", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "Measures of distortion for machine learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11479", "id": "11479", "author_site": "Leena Chennuru Vankadara, Ulrike von Luxburg", "author": "Leena Chennuru Vankadara; Ulrike von Luxburg", "abstract": "Given data from a general metric space, one of the standard machine learning pipelines is to first embed the data into a Euclidean space and subsequently apply out of the box machine learning algorithms to analyze the data. The quality of such an embedding is typically described in terms of a distortion measure. In this paper, we show that many of the existing distortion measures behave in an undesired way, when considered from a machine learning point of view. We investigate desirable properties of distortion measures and formally prove that most of the existing measures fail to satisfy these properties. These theoretical findings are supported by simulations, which for example demonstrate that existing distortion measures are not robust to noise or outliers and cannot serve as good indicators for classification accuracy. As an alternative, we suggest a new measure of distortion, called $\\sigma$-distortion. We can show both in theory and in experiments that it satisfies all desirable properties and is a better candidate to evaluate distortion in the context of machine learning.", "bibtex": "@inproceedings{NEURIPS2018_4c5bcfec,\n author = {Chennuru Vankadara, Leena and von Luxburg, Ulrike},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Measures of distortion for machine learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4c5bcfec8584af0d967f1ab10179ca4b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4c5bcfec8584af0d967f1ab10179ca4b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4c5bcfec8584af0d967f1ab10179ca4b-Reviews.html", "metareview": "", "pdf_size": 503410, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=807660579247080240&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of T\u00fcbingen + Max Planck Institute for Intelligent Systems, T\u00fcbingen; University of T\u00fcbingen + Max Planck Institute for Intelligent Systems, T\u00fcbingen", "aff_domain": "tuebingen.mpg.de;informatik.uni-tuebingen.de", "email": "tuebingen.mpg.de;informatik.uni-tuebingen.de", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4c5bcfec8584af0d967f1ab10179ca4b-Abstract.html", "aff_unique_index": "0+1;0+1", "aff_unique_norm": "University of T\u00fcbingen;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.mpi-is.mpg.de", "aff_unique_abbr": "Uni T\u00fcbingen;MPI-IS", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0+0;0+0", "aff_country_unique": "Germany" }, { "title": "Memory Augmented Policy Optimization for Program Synthesis and Semantic Parsing", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11948", "id": "11948", "author_site": "Chen Liang, Mohammad Norouzi, Jonathan Berant, Quoc V Le, Ni Lao", "author": "Chen Liang; Mohammad Norouzi; Jonathan Berant; Quoc V Le; Ni Lao", "abstract": "We present Memory Augmented Policy Optimization (MAPO), a simple and novel way to leverage a memory buffer of promising trajectories to reduce the variance of policy gradient estimate. MAPO is applicable to deterministic environments with discrete actions, such as structured prediction and combinatorial optimization tasks. We express the expected return objective as a weighted sum of two terms: an\nexpectation over the high-reward trajectories inside the memory buffer, and a separate expectation over trajectories outside the buffer. To make an efficient algorithm of MAPO, we propose: (1) memory weight clipping to accelerate and stabilize training; (2) systematic exploration to discover high-reward trajectories; (3) distributed sampling from inside and outside of the memory buffer to scale up training. MAPO improves the sample efficiency and robustness of policy gradient, especially on tasks with sparse rewards. We evaluate MAPO on weakly supervised program synthesis from natural language (semantic parsing). On the WikiTableQuestions benchmark, we improve the state-of-the-art by 2.6%, achieving an accuracy of 46.3%. On the WikiSQL benchmark, MAPO achieves an accuracy of 74.9% with only weak supervision, outperforming several strong baselines with full supervision. Our source code is available at https://goo.gl/TXBp4e", "bibtex": "@inproceedings{NEURIPS2018_f4e369c0,\n author = {Liang, Chen and Norouzi, Mohammad and Berant, Jonathan and Le, Quoc V and Lao, Ni},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Memory Augmented Policy Optimization for Program Synthesis and Semantic Parsing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f4e369c0a468d3aeeda0593ba90b5e55-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f4e369c0a468d3aeeda0593ba90b5e55-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f4e369c0a468d3aeeda0593ba90b5e55-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f4e369c0a468d3aeeda0593ba90b5e55-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f4e369c0a468d3aeeda0593ba90b5e55-Reviews.html", "metareview": "", "pdf_size": 1390008, "gs_citation": 148, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4398387474099067788&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Google Brain; Google Brain; Tel-Aviv University, AI2; Google Brain; SayMosaic Inc.", "aff_domain": "gmail.com;google.com;cs.tau.ac.il;google.com;mosaix.ai", "email": "gmail.com;google.com;cs.tau.ac.il;google.com;mosaix.ai", "github": "", "project": "goo.gl/TXBp4e", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f4e369c0a468d3aeeda0593ba90b5e55-Abstract.html", "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Google;Tel-Aviv University;SayMosaic", "aff_unique_dep": "Google Brain;AI2;", "aff_unique_url": "https://brain.google.com;https://www.tau.ac.il;", "aff_unique_abbr": "Google Brain;TAU;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Israel" }, { "title": "Memory Replay GANs: Learning to Generate New Categories without Forgetting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11579", "id": "11579", "author_site": "Chenshen Wu, Luis Herranz, Xialei Liu, yaxing wang, Joost van de Weijer, Bogdan Raducanu", "author": "Chenshen Wu; Luis Herranz; Xialei Liu; yaxing wang; Joost van de Weijer; Bogdan Raducanu", "abstract": "Previous works on sequential learning address the problem of forgetting in discriminative models. In this paper we consider the case of generative models. In particular, we investigate generative adversarial networks (GANs) in the task of learning new categories in a sequential fashion. We first show that sequential fine tuning renders the network unable to properly generate images from previous categories (i.e. forgetting). Addressing this problem, we propose Memory Replay GANs (MeRGANs), a conditional GAN framework that integrates a memory replay generator. We study two methods to prevent forgetting by leveraging these replays, namely joint training with replay and replay alignment. Qualitative and quantitative experimental results in MNIST, SVHN and LSUN datasets show that our memory replay approach can generate competitive images while significantly mitigating the forgetting of previous categories.", "bibtex": "@inproceedings{NEURIPS2018_a57e8915,\n author = {Wu, Chenshen and Herranz, Luis and Liu, Xialei and wang, yaxing and van de Weijer, Joost and Raducanu, Bogdan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Memory Replay GANs: Learning to Generate New Categories without Forgetting},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a57e8915461b83adefb011530b711704-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a57e8915461b83adefb011530b711704-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a57e8915461b83adefb011530b711704-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a57e8915461b83adefb011530b711704-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a57e8915461b83adefb011530b711704-Reviews.html", "metareview": "", "pdf_size": 2825338, "gs_citation": 540, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10386986757383440246&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Computer Vision Center; Computer Vision Center; Computer Vision Center; Computer Vision Center; Computer Vision Center; Computer Vision Center", "aff_domain": "cvc.uab.es;cvc.uab.es;cvc.uab.es;cvc.uab.es;cvc.uab.es;cvc.uab.es", "email": "cvc.uab.es;cvc.uab.es;cvc.uab.es;cvc.uab.es;cvc.uab.es;cvc.uab.es", "github": "https://github.com/WuChenshen/MeRGAN", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a57e8915461b83adefb011530b711704-Abstract.html", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Computer Vision Center", "aff_unique_dep": "", "aff_unique_url": "https://www.cvc.uab.cat/", "aff_unique_abbr": "CVC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Spain" }, { "title": "Mental Sampling in Multimodal Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11560", "id": "11560", "author_site": "Jianqiao Zhu, Adam Sanborn, Nick Chater", "author": "Jianqiao Zhu; Adam Sanborn; Nick Chater", "abstract": "Both resources in the natural environment and concepts in a semantic space are distributed \"patchily\", with large gaps in between the patches. To describe people's internal and external foraging behavior, various random walk models have been proposed. In particular, internal foraging has been modeled as sampling: in order to gather relevant information for making a decision, people draw samples from a mental representation using random-walk algorithms such as Markov chain Monte Carlo (MCMC). However, two common empirical observations argue against people using simple sampling algorithms such as MCMC for internal foraging. First, the distance between samples is often best described by a Levy flight distribution: the probability of the distance between two successive locations follows a power-law on the distances. Second, humans and other animals produce long-range, slowly decaying autocorrelations characterized as 1/f-like fluctuations, instead of the 1/f^2 fluctuations produced by random walks. We propose that mental sampling is not done by simple MCMC, but is instead adapted to multimodal representations and is implemented by Metropolis-coupled Markov chain Monte Carlo (MC3), one of the first algorithms developed for sampling from multimodal distributions. MC3 involves running multiple Markov chains in parallel but with target distributions of different temperatures, and it swaps the states of the chains whenever a better location is found. Heated chains more readily traverse valleys in the probability landscape to propose moves to far-away peaks, while the colder chains make the local steps that explore the current peak or patch. We show that MC3 generates distances between successive samples that follow a Levy flight distribution and produce 1/f-like autocorrelations, providing a single mechanistic account of these two puzzling empirical phenomena of internal foraging.", "bibtex": "@inproceedings{NEURIPS2018_b4a721cf,\n author = {Zhu, Jianqiao and Sanborn, Adam and Chater, Nick},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mental Sampling in Multimodal Representations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b4a721cfb62f5d19ec61575114d8a2d1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b4a721cfb62f5d19ec61575114d8a2d1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b4a721cfb62f5d19ec61575114d8a2d1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b4a721cfb62f5d19ec61575114d8a2d1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b4a721cfb62f5d19ec61575114d8a2d1-Reviews.html", "metareview": "", "pdf_size": 975034, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4975639582495631592&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Psychology, University of Warwick; Department of Psychology, University of Warwick; Behavioural Science Group, Warwick Business School", "aff_domain": "warwick.ac.uk;warwick.ac.uk;wbs.ac.uk", "email": "warwick.ac.uk;warwick.ac.uk;wbs.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b4a721cfb62f5d19ec61575114d8a2d1-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Warwick;Warwick Business School", "aff_unique_dep": "Department of Psychology;Behavioural Science Group", "aff_unique_url": "https://www.warwick.ac.uk;https://www.wbs.ac.uk", "aff_unique_abbr": "Warwick;WBS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Mesh-TensorFlow: Deep Learning for Supercomputers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11985", "id": "11985", "author_site": "Noam Shazeer, Youlong Cheng, Niki Parmar, Dustin Tran, Ashish Vaswani, Penporn Koanantakool, Peter Hawkins, HyoukJoong Lee, Mingsheng Hong, Cliff Young, Ryan Sepassi, Blake Hechtman", "author": "Noam Shazeer; Youlong Cheng; Niki Parmar; Dustin Tran; Ashish Vaswani; Penporn Koanantakool; Peter Hawkins; HyoukJoong Lee; Mingsheng Hong; Cliff Young; Ryan Sepassi; Blake Hechtman", "abstract": "Batch-splitting (data-parallelism) is the dominant distributed Deep Neural Network (DNN) training strategy, due to its universal applicability and its amenability to Single-Program-Multiple-Data (SPMD) programming. However, batch-splitting suffers from problems including the inability to train very large models (due to memory constraints), high latency, and inefficiency at small batch sizes. All of these can be solved by more general distribution strategies (model-parallelism). Unfortunately, efficient model-parallel algorithms tend to be complicated to discover, describe, and to implement, particularly on large clusters. We introduce Mesh-TensorFlow, a language for specifying a general class of distributed tensor computations. Where data-parallelism can be viewed as splitting tensors and operations along the \"batch\" dimension, in Mesh-TensorFlow, the user can specify any tensor-dimensions to be split across any dimensions of a multi-dimensional mesh of processors. A Mesh-TensorFlow graph compiles into a SPMD program consisting of parallel operations coupled with collective communication primitives such as Allreduce. We use Mesh-TensorFlow to implement an efficient data-parallel, model-parallel version of the Transformer sequence-to-sequence model. Using TPU meshes of up to 512 cores, we train Transformer models with up to 5 billion parameters, surpassing SOTA results on WMT'14 English-to-French translation task and the one-billion-word Language modeling benchmark. Mesh-Tensorflow is available at https://github.com/tensorflow/mesh", "bibtex": "@inproceedings{NEURIPS2018_3a37abde,\n author = {Shazeer, Noam and Cheng, Youlong and Parmar, Niki and Tran, Dustin and Vaswani, Ashish and Koanantakool, Penporn and Hawkins, Peter and Lee, HyoukJoong and Hong, Mingsheng and Young, Cliff and Sepassi, Ryan and Hechtman, Blake},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mesh-TensorFlow: Deep Learning for Supercomputers},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3a37abdeefe1dab1b30f7c5c7e581b93-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3a37abdeefe1dab1b30f7c5c7e581b93-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3a37abdeefe1dab1b30f7c5c7e581b93-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3a37abdeefe1dab1b30f7c5c7e581b93-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3a37abdeefe1dab1b30f7c5c7e581b93-Reviews.html", "metareview": "", "pdf_size": 234289, "gs_citation": 468, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1887735754811341119&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Google Brain; Google Brain; Google Brain; Google Brain; Google Brain; Google Brain; Google Brain; Google Brain; Google Brain; Google Brain; Google Brain; Google Brain", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "github": "https://github.com/tensorflow/mesh", "project": "", "author_num": 12, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3a37abdeefe1dab1b30f7c5c7e581b93-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Brain", "aff_unique_url": "https://brain.google.com", "aff_unique_abbr": "Google Brain", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Meta-Gradient Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11249", "id": "11249", "author_site": "Zhongwen Xu, Hado van Hasselt, David Silver", "author": "Zhongwen Xu; Hado P van Hasselt; David Silver", "abstract": "The goal of reinforcement learning algorithms is to estimate and/or optimise\nthe value function. However, unlike supervised learning, no teacher or oracle is\navailable to provide the true value function. Instead, the majority of reinforcement\nlearning algorithms estimate and/or optimise a proxy for the value function. This\nproxy is typically based on a sampled and bootstrapped approximation to the true\nvalue function, known as a return. The particular choice of return is one of the\nchief components determining the nature of the algorithm: the rate at which future\nrewards are discounted; when and how values should be bootstrapped; or even the\nnature of the rewards themselves. It is well-known that these decisions are crucial\nto the overall success of RL algorithms. We discuss a gradient-based meta-learning\nalgorithm that is able to adapt the nature of the return, online, whilst interacting\nand learning from the environment. When applied to 57 games on the Atari 2600\nenvironment over 200 million frames, our algorithm achieved a new state-of-the-art\nperformance.", "bibtex": "@inproceedings{NEURIPS2018_2715518c,\n author = {Xu, Zhongwen and van Hasselt, Hado P and Silver, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Meta-Gradient Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2715518c875999308842e3455eda2fe3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2715518c875999308842e3455eda2fe3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2715518c875999308842e3455eda2fe3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2715518c875999308842e3455eda2fe3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2715518c875999308842e3455eda2fe3-Reviews.html", "metareview": "", "pdf_size": 1111580, "gs_citation": 396, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1232351434867845885&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "DeepMind; DeepMind; DeepMind", "aff_domain": "google.com;google.com;google.com", "email": "google.com;google.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2715518c875999308842e3455eda2fe3-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "DeepMind", "aff_unique_dep": "", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Meta-Learning MCMC Proposals", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11411", "id": "11411", "author_site": "Tongzhou Wang, YI WU, Dave Moore, Stuart Russell", "author": "Tongzhou Wang; YI WU; Dave Moore; Stuart Russell", "abstract": "Effective implementations of sampling-based probabilistic inference often require manually constructed, model-specific proposals. Inspired by recent progresses in meta-learning for training learning agents that can generalize to unseen environments, we propose a meta-learning approach to building effective and generalizable MCMC proposals. We parametrize the proposal as a neural network to provide fast approximations to block Gibbs conditionals. The learned neural proposals generalize to occurrences of common structural motifs across different models, allowing for the construction of a library of learned inference primitives that can accelerate inference on unseen models with no model-specific training required. We explore several applications including open-universe Gaussian mixture models, in which our learned proposals outperform a hand-tuned sampler, and a real-world named entity recognition task, in which our sampler yields higher final F1 scores than classical single-site Gibbs sampling.", "bibtex": "@inproceedings{NEURIPS2018_584b98aa,\n author = {Wang, Tongzhou and WU, YI and Moore, Dave and Russell, Stuart J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Meta-Learning MCMC Proposals},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/584b98aac2dddf59ee2cf19ca4ccb75e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/584b98aac2dddf59ee2cf19ca4ccb75e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/584b98aac2dddf59ee2cf19ca4ccb75e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/584b98aac2dddf59ee2cf19ca4ccb75e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/584b98aac2dddf59ee2cf19ca4ccb75e-Reviews.html", "metareview": "", "pdf_size": 1333597, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8605609970928911253&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Facebook AI Research + University of California, Berkeley; University of California, Berkeley; Google + University of California, Berkeley; University of California, Berkeley", "aff_domain": "gmail.com;gmail.com;gmail.com;cs.berkeley.edu", "email": "gmail.com;gmail.com;gmail.com;cs.berkeley.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/584b98aac2dddf59ee2cf19ca4ccb75e-Abstract.html", "aff_unique_index": "0+1;1;2+1;1", "aff_unique_norm": "Meta;University of California, Berkeley;Google", "aff_unique_dep": "Facebook AI Research;;Google", "aff_unique_url": "https://research.facebook.com;https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "FAIR;UC Berkeley;Google", "aff_campus_unique_index": "1;1;2+1;1", "aff_campus_unique": ";Berkeley;Mountain View", "aff_country_unique_index": "0+0;0;0+0;0", "aff_country_unique": "United States" }, { "title": "Meta-Reinforcement Learning of Structured Exploration Strategies", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11518", "id": "11518", "author_site": "Abhishek Gupta, Russell Mendonca, YuXuan Liu, Pieter Abbeel, Sergey Levine", "author": "Abhishek Gupta; Russell Mendonca; YuXuan Liu; Pieter Abbeel; Sergey Levine", "abstract": "Exploration is a fundamental challenge in reinforcement learning (RL). Many\ncurrent exploration methods for deep RL use task-agnostic objectives, such as\ninformation gain or bonuses based on state visitation. However, many practical\napplications of RL involve learning more than a single task, and prior tasks can be\nused to inform how exploration should be performed in new tasks. In this work, we\nstudy how prior tasks can inform an agent about how to explore effectively in new\nsituations. We introduce a novel gradient-based fast adaptation algorithm \u2013 model\nagnostic exploration with structured noise (MAESN) \u2013 to learn exploration strategies\nfrom prior experience. The prior experience is used both to initialize a policy\nand to acquire a latent exploration space that can inject structured stochasticity into\na policy, producing exploration strategies that are informed by prior knowledge\nand are more effective than random action-space noise. We show that MAESN is\nmore effective at learning exploration strategies when compared to prior meta-RL\nmethods, RL without learned exploration strategies, and task-agnostic exploration\nmethods. We evaluate our method on a variety of simulated tasks: locomotion with\na wheeled robot, locomotion with a quadrupedal walker, and object manipulation.", "bibtex": "@inproceedings{NEURIPS2018_4de75424,\n author = {Gupta, Abhishek and Mendonca, Russell and Liu, YuXuan and Abbeel, Pieter and Levine, Sergey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Meta-Reinforcement Learning of Structured Exploration Strategies},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4de754248c196c85ee4fbdcee89179bd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4de754248c196c85ee4fbdcee89179bd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4de754248c196c85ee4fbdcee89179bd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4de754248c196c85ee4fbdcee89179bd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4de754248c196c85ee4fbdcee89179bd-Reviews.html", "metareview": "", "pdf_size": 2498781, "gs_citation": 471, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8837867565687609361&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Electrical Engineering and Computer Science, University of California, Berkeley; Department of Electrical Engineering and Computer Science, University of California, Berkeley; Department of Electrical Engineering and Computer Science, University of California, Berkeley; Department of Electrical Engineering and Computer Science, University of California, Berkeley; Department of Electrical Engineering and Computer Science, University of California, Berkeley", "aff_domain": "eecs.berkeley.edu;berkeley.edu;berkeley.edu;eecs.berkeley.edu;eecs.berkeley.edu", "email": "eecs.berkeley.edu;berkeley.edu;berkeley.edu;eecs.berkeley.edu;eecs.berkeley.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4de754248c196c85ee4fbdcee89179bd-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Department of Electrical Engineering and Computer Science", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "MetaAnchor: Learning to Detect Objects with Customized Anchors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11057", "id": "11057", "author_site": "Tong Yang, Xiangyu Zhang, Zeming Li, Wenqiang Zhang, Jian Sun", "author": "Tong Yang; Xiangyu Zhang; Zeming Li; Wenqiang Zhang; Jian Sun", "abstract": "We propose a novel and flexible anchor mechanism named MetaAnchor for object detection frameworks. Unlike many previous detectors model anchors via a predefined manner, in MetaAnchor anchor functions could be dynamically generated from the arbitrary customized prior boxes. Taking advantage of weight prediction, MetaAnchor is able to work with most of the anchor-based object detection systems such as RetinaNet. Compared with the predefined anchor scheme, we empirically find that MetaAnchor is more robust to anchor settings and bounding box distributions; in addition, it also shows the potential on the transfer task. Our experiment on COCO detection task shows MetaAnchor consistently outperforms the counterparts in various scenarios.", "bibtex": "@inproceedings{NEURIPS2018_69adc1e1,\n author = {Yang, Tong and Zhang, Xiangyu and Li, Zeming and Zhang, Wenqiang and Sun, Jian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {MetaAnchor: Learning to Detect Objects with Customized Anchors},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/69adc1e107f7f7d035d7baf04342e1ca-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/69adc1e107f7f7d035d7baf04342e1ca-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/69adc1e107f7f7d035d7baf04342e1ca-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/69adc1e107f7f7d035d7baf04342e1ca-Reviews.html", "metareview": "", "pdf_size": 1511158, "gs_citation": 192, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2066204067919441204&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Megvii Inc (Face++) + Fudan University; Megvii Inc (Face++) + Fudan University; Megvii Inc (Face++) + Fudan University; Fudan University; Megvii Inc (Face++) + Fudan University", "aff_domain": "megvii.com;megvii.com;megvii.com;fudan.edu.cn;megvii.com", "email": "megvii.com;megvii.com;megvii.com;fudan.edu.cn;megvii.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/69adc1e107f7f7d035d7baf04342e1ca-Abstract.html", "aff_unique_index": "0+2;0+2;0+2;2;0+2", "aff_unique_norm": "Megvii Inc;;Fudan University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.megvii.com;;https://www.fudan.edu.cn", "aff_unique_abbr": "Megvii;;Fudan", "aff_campus_unique_index": ";;;", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0+0;0+0;0;0+0", "aff_country_unique": "China;" }, { "title": "MetaGAN: An Adversarial Approach to Few-Shot Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11246", "id": "11246", "author_site": "Ruixiang ZHANG, Tong Che, Zoubin Ghahramani, Yoshua Bengio, Yangqiu Song", "author": "Ruixiang ZHANG; Tong Che; Zoubin Ghahramani; Yoshua Bengio; Yangqiu Song", "abstract": "In this paper, we propose a conceptually simple and general framework called MetaGAN for few-shot learning problems. Most state-of-the-art few-shot classification models can be integrated with MetaGAN in a principled and straightforward way. By introducing an adversarial generator conditioned on tasks, we augment vanilla few-shot classification models with the ability to discriminate between real and fake data. We argue that this GAN-based approach can help few-shot classifiers to learn sharper decision boundary, which could generalize better. We show that with our MetaGAN framework, we can extend supervised few-shot learning models to naturally cope with unsupervised data. Different from previous work in semi-supervised few-shot learning, our algorithms can deal with semi-supervision at both sample-level and task-level. We give theoretical justifications of the strength of MetaGAN, and validate the effectiveness of MetaGAN on challenging few-shot image classification benchmarks.", "bibtex": "@inproceedings{NEURIPS2018_4e4e53aa,\n author = {ZHANG, Ruixiang and Che, Tong and Ghahramani, Zoubin and Bengio, Yoshua and Song, Yangqiu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {MetaGAN: An Adversarial Approach to Few-Shot Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4e4e53aa080247bc31d0eb4e7aeb07a0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4e4e53aa080247bc31d0eb4e7aeb07a0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4e4e53aa080247bc31d0eb4e7aeb07a0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4e4e53aa080247bc31d0eb4e7aeb07a0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4e4e53aa080247bc31d0eb4e7aeb07a0-Reviews.html", "metareview": "", "pdf_size": 325352, "gs_citation": 724, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3532444090702481918&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "MILA, Universit\u00e9 de Montr\u00e9al + HKUST; MILA, Universit\u00e9 de Montr\u00e9al; University of Cambridge; MILA, Universit\u00e9 de Montr\u00e9al, CIFAR Senior Fellow; HKUST", "aff_domain": "gmail.com;gmail.com;cam.ac.uk;mila.quebec;cse.ust.hk", "email": "gmail.com;gmail.com;cam.ac.uk;mila.quebec;cse.ust.hk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4e4e53aa080247bc31d0eb4e7aeb07a0-Abstract.html", "aff_unique_index": "0+1;0;2;0;1", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Hong Kong University of Science and Technology;University of Cambridge", "aff_unique_dep": "MILA;;", "aff_unique_url": "https://www.umontreal.ca;https://www.ust.hk;https://www.cam.ac.uk", "aff_unique_abbr": "UdeM;HKUST;Cambridge", "aff_campus_unique_index": "0+1;0;2;0;1", "aff_campus_unique": "Montr\u00e9al;Hong Kong SAR;Cambridge", "aff_country_unique_index": "0+1;0;2;0;1", "aff_country_unique": "Canada;China;United Kingdom" }, { "title": "MetaReg: Towards Domain Generalization using Meta-Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11120", "id": "11120", "author_site": "Yogesh Balaji, Swami Sankaranarayanan, Rama Chellappa", "author": "Yogesh Balaji; Swami Sankaranarayanan; Rama Chellappa", "abstract": "Training models that generalize to new domains at test time is a problem of fundamental importance in machine learning. In this work, we encode this notion of domain generalization using a novel regularization function. We pose the problem of finding such a regularization function in a Learning to Learn (or) meta-learning framework. The objective of domain generalization is explicitly modeled by learning a regularizer that makes the model trained on one domain to perform well on another domain. Experimental validations on computer vision and natural language datasets indicate that our method can learn regularizers that achieve good cross-domain generalization.", "bibtex": "@inproceedings{NEURIPS2018_647bba34,\n author = {Balaji, Yogesh and Sankaranarayanan, Swami and Chellappa, Rama},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {MetaReg: Towards Domain Generalization using Meta-Regularization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/647bba344396e7c8170902bcf2e15551-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/647bba344396e7c8170902bcf2e15551-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/647bba344396e7c8170902bcf2e15551-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/647bba344396e7c8170902bcf2e15551-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/647bba344396e7c8170902bcf2e15551-Reviews.html", "metareview": "", "pdf_size": 468258, "gs_citation": 859, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14074977941346308540&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, University of Maryland; Butterfly Network Inc. + University of Maryland; Department of Electrical and Computer Engineering, University of Maryland", "aff_domain": "cs.umd.edu;butterflynetinc.com;umiacs.umd.edu", "email": "cs.umd.edu;butterflynetinc.com;umiacs.umd.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/647bba344396e7c8170902bcf2e15551-Abstract.html", "aff_unique_index": "0;1+0;0", "aff_unique_norm": "University of Maryland;Butterfly Network Inc.", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www/umd.edu;https://www.butterflynetwork.com", "aff_unique_abbr": "UMD;Butterfly Network", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0;0", "aff_country_unique": "United States" }, { "title": "Metric on Nonlinear Dynamical Systems with Perron-Frobenius Operators", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11292", "id": "11292", "author_site": "Isao Ishikawa, Keisuke Fujii, Masahiro Ikeda, Yuka Hashimoto, Yoshinobu Kawahara", "author": "Isao Ishikawa; Keisuke Fujii; Masahiro Ikeda; Yuka Hashimoto; Yoshinobu Kawahara", "abstract": "The development of a metric for structural data is a long-term problem in pattern recognition and machine learning. In this paper, we develop a general metric for comparing nonlinear dynamical systems that is defined with Perron-Frobenius operators in reproducing kernel Hilbert spaces. Our metric includes the existing fundamental metrics for dynamical systems, which are basically defined with principal angles between some appropriately-chosen subspaces, as its special cases. We also describe the estimation of our metric from finite data. We empirically illustrate our metric with an example of rotation dynamics in a unit disk in a complex plane, and evaluate the performance with real-world time-series data.", "bibtex": "@inproceedings{NEURIPS2018_fa1e9c96,\n author = {Ishikawa, Isao and Fujii, Keisuke and Ikeda, Masahiro and Hashimoto, Yuka and Kawahara, Yoshinobu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Metric on Nonlinear Dynamical Systems with Perron-Frobenius Operators},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fa1e9c965314ccd7810fb5ea838303e5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fa1e9c965314ccd7810fb5ea838303e5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/fa1e9c965314ccd7810fb5ea838303e5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fa1e9c965314ccd7810fb5ea838303e5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fa1e9c965314ccd7810fb5ea838303e5-Reviews.html", "metareview": "", "pdf_size": 1195060, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9736849801126744369&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "RIKEN Center for Advanced Intelligence Project + School of Fundamental Science and Technology, Keio University + The Institute of Scientific and Industrial Research, Osaka University; RIKEN Center for Advanced Intelligence Project; RIKEN Center for Advanced Intelligence Project + School of Fundamental Science and Technology, Keio University; RIKEN Center for Advanced Intelligence Project + School of Fundamental Science and Technology, Keio University; RIKEN Center for Advanced Intelligence Project + The Institute of Scientific and Industrial Research, Osaka University", "aff_domain": "riken.jp;riken.jp;riken.jp;keio.jp;sanken.osaka-u.ac.jp", "email": "riken.jp;riken.jp;riken.jp;keio.jp;sanken.osaka-u.ac.jp", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fa1e9c965314ccd7810fb5ea838303e5-Abstract.html", "aff_unique_index": "0+1+2;0;0+1;0+1;0+2", "aff_unique_norm": "RIKEN;Keio University;Osaka University", "aff_unique_dep": "Center for Advanced Intelligence Project;School of Fundamental Science and Technology;The Institute of Scientific and Industrial Research", "aff_unique_url": "https://www.riken.jp/en/;https://www.keio.ac.jp;https://www.osaka-u.ac.jp", "aff_unique_abbr": "RIKEN;Keio;Osaka U", "aff_campus_unique_index": "1;;;1", "aff_campus_unique": ";Osaka", "aff_country_unique_index": "0+0+0;0;0+0;0+0;0+0", "aff_country_unique": "Japan" }, { "title": "MiME: Multilevel Medical Embedding of Electronic Health Records for Predictive Healthcare", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11448", "id": "11448", "author_site": "Edward Choi, Cao Xiao, Walter Stewart, Jimeng Sun", "author": "Edward Choi; Cao Xiao; Walter Stewart; Jimeng Sun", "abstract": "Deep learning models exhibit state-of-the-art performance for many predictive healthcare tasks using electronic health records (EHR) data, but these models typically require training data volume that exceeds the capacity of most healthcare systems.\nExternal resources such as medical ontologies are used to bridge the data volume constraint, but this approach is often not directly applicable or useful because of inconsistencies with terminology.\nTo solve the data insufficiency challenge, we leverage the inherent multilevel structure of EHR data and, in particular, the encoded relationships among medical codes.\nWe propose Multilevel Medical Embedding (MiME) which learns the multilevel embedding of EHR data while jointly performing auxiliary prediction tasks that rely on this inherent EHR structure without the need for external labels. \nWe conducted two prediction tasks, heart failure prediction and sequential disease prediction, where MiME outperformed baseline methods in diverse evaluation settings.\nIn particular, MiME consistently outperformed all baselines when predicting heart failure on datasets of different volumes, especially demonstrating the greatest performance improvement (15% relative gain in PR-AUC over the best baseline) on the smallest dataset, demonstrating its ability to effectively model the multilevel structure of EHR data.", "bibtex": "@inproceedings{NEURIPS2018_934b5358,\n author = {Choi, Edward and Xiao, Cao and Stewart, Walter and Sun, Jimeng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {MiME: Multilevel Medical Embedding of Electronic Health Records for Predictive Healthcare},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/934b535800b1cba8f96a5d72f72f1611-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/934b535800b1cba8f96a5d72f72f1611-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/934b535800b1cba8f96a5d72f72f1611-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/934b535800b1cba8f96a5d72f72f1611-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/934b535800b1cba8f96a5d72f72f1611-Reviews.html", "metareview": "", "pdf_size": 553229, "gs_citation": 314, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9778014794664384350&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Google Brain + Georgia Institute of Technology; IBM Research; HINT Consultants + Sutter Health; Georgia Institute of Technology", "aff_domain": "google.com;us.ibm.com;yahoo.com;cc.gatech.edu", "email": "google.com;us.ibm.com;yahoo.com;cc.gatech.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/934b535800b1cba8f96a5d72f72f1611-Abstract.html", "aff_unique_index": "0+1;2;3+4;1", "aff_unique_norm": "Google;Georgia Institute of Technology;IBM;HINT Consultants;Sutter Health", "aff_unique_dep": "Google Brain;;IBM Research;;", "aff_unique_url": "https://brain.google.com;https://www.gatech.edu;https://www.ibm.com/research;;https://www.sutterhealth.org", "aff_unique_abbr": "Google Brain;Georgia Tech;IBM;;", "aff_campus_unique_index": "0;", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0+0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Middle-Out Decoding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11539", "id": "11539", "author_site": "Shikib Mehri, Leonid Sigal", "author": "Shikib Mehri; Leonid Sigal", "abstract": "Despite being virtually ubiquitous, sequence-to-sequence models are challenged by their lack of diversity and inability to be externally controlled. In this paper, we speculate that a fundamental shortcoming of sequence generation models is that the decoding is done strictly from left-to-right, meaning that outputs values generated earlier have a profound effect on those generated later. To address this issue, we propose a novel middle-out decoder architecture that begins from an initial middle-word and simultaneously expands the sequence in both directions. To facilitate information flow and maintain consistent decoding, we introduce a dual self-attention mechanism that allows us to model complex dependencies between the outputs. We illustrate the performance of our model on the task of video captioning, as well as a synthetic sequence de-noising task. Our middle-out decoder achieves significant improvements on de-noising and competitive performance in the task of video captioning, while quantifiably improving the caption diversity. Furthermore, we perform a qualitative analysis that demonstrates our ability to effectively control the generation process of our decoder.", "bibtex": "@inproceedings{NEURIPS2018_0c215f19,\n author = {Mehri, Shikib and Sigal, Leonid},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Middle-Out Decoding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0c215f194276000be6a6df6528067151-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0c215f194276000be6a6df6528067151-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0c215f194276000be6a6df6528067151-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0c215f194276000be6a6df6528067151-Reviews.html", "metareview": "", "pdf_size": 802899, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1782813335949984098&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Computer Science, University of British Columbia; Department of Computer Science, University of British Columbia", "aff_domain": "cs.cmu.edu;cs.ubc.ca", "email": "cs.cmu.edu;cs.ubc.ca", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0c215f194276000be6a6df6528067151-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Vancouver", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Minimax Estimation of Neural Net Distance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11383", "id": "11383", "author_site": "Kaiyi Ji, Yingbin Liang", "author": "Kaiyi Ji; Yingbin Liang", "abstract": "An important class of distance metrics proposed for training generative adversarial networks (GANs) is the integral probability metric (IPM), in which the neural net distance captures the practical GAN training via two neural networks. This paper investigates the minimax estimation problem of the neural net distance based on samples drawn from the distributions. We develop the first known minimax lower bound on the estimation error of the neural net distance, and an upper bound tighter than an existing bound on the estimator error for the empirical neural net distance. Our lower and upper bounds match not only in the order of the sample size but also in terms of the norm of the parameter matrices of neural networks, which justifies the empirical neural net distance as a good approximation of the true neural net distance for training GANs in practice.", "bibtex": "@inproceedings{NEURIPS2018_dea9ddb2,\n author = {Ji, Kaiyi and Liang, Yingbin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Minimax Estimation of Neural Net Distance},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dea9ddb25cbf2352cf4dec30222a02a5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dea9ddb25cbf2352cf4dec30222a02a5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dea9ddb25cbf2352cf4dec30222a02a5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dea9ddb25cbf2352cf4dec30222a02a5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dea9ddb25cbf2352cf4dec30222a02a5-Reviews.html", "metareview": "", "pdf_size": 324748, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14915021996531220347&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of ECE, The Ohio State University; Department of ECE, The Ohio State University", "aff_domain": "osu.edu;osu.edu", "email": "osu.edu;osu.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dea9ddb25cbf2352cf4dec30222a02a5-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Minimax Statistical Learning with Wasserstein distances", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11276", "id": "11276", "author_site": "Jaeho Lee, Maxim Raginsky", "author": "Jaeho Lee; Maxim Raginsky", "abstract": "As opposed to standard empirical risk minimization (ERM), distributionally robust optimization aims to minimize the worst-case risk over a larger ambiguity set containing the original empirical distribution of the training data. In this work, we describe a minimax framework for statistical learning with ambiguity sets given by balls in Wasserstein space. In particular, we prove generalization bounds that involve the covering number properties of the original ERM problem. As an illustrative example, we provide generalization guarantees for transport-based domain adaptation problems where the Wasserstein distance between the source and target domain distributions can be reliably estimated from unlabeled samples.", "bibtex": "@inproceedings{NEURIPS2018_ea8fcd92,\n author = {Lee, Jaeho and Raginsky, Maxim},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Minimax Statistical Learning with Wasserstein distances},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ea8fcd92d59581717e06eb187f10666d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ea8fcd92d59581717e06eb187f10666d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ea8fcd92d59581717e06eb187f10666d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ea8fcd92d59581717e06eb187f10666d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ea8fcd92d59581717e06eb187f10666d-Reviews.html", "metareview": "", "pdf_size": 550657, "gs_citation": 213, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9707381454733877969&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Electrical and Computer Engineering and Coordinated Science Laboratory, University of Illinois, Urbana, IL 61801, USA; Department of Electrical and Computer Engineering and Coordinated Science Laboratory, University of Illinois, Urbana, IL 61801, USA", "aff_domain": "illinois.edu;illinois.edu", "email": "illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ea8fcd92d59581717e06eb187f10666d-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Mirrored Langevin Dynamics", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11294", "id": "11294", "author_site": "Ya-Ping Hsieh, Ali Kavis, Paul Rolland, Volkan Cevher", "author": "Ya-Ping Hsieh; Ali Kavis; Paul Rolland; Volkan Cevher", "abstract": "We consider the problem of sampling from constrained distributions, which has posed significant challenges to both non-asymptotic analysis and algorithmic design. We propose a unified framework, which is inspired by the classical mirror descent, to derive novel first-order sampling schemes. We prove that, for a general target distribution with strongly convex potential, our framework implies the existence of a first-order algorithm achieving O~(\\epsilon^{-2}d) convergence, suggesting that the state-of-the-art O~(\\epsilon^{-6}d^5) can be vastly improved. With the important Latent Dirichlet Allocation (LDA) application in mind, we specialize our algorithm to sample from Dirichlet posteriors, and derive the first non-asymptotic O~(\\epsilon^{-2}d^2) rate for first-order sampling. We further extend our framework to the mini-batch setting and prove convergence rates when only stochastic gradients are available. Finally, we report promising experimental results for LDA on real datasets.", "bibtex": "@inproceedings{NEURIPS2018_6490791e,\n author = {Hsieh, Ya-Ping and Kavis, Ali and Rolland, Paul and Cevher, Volkan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mirrored Langevin Dynamics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6490791e7abf6b29a381288cc23a8223-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6490791e7abf6b29a381288cc23a8223-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6490791e7abf6b29a381288cc23a8223-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6490791e7abf6b29a381288cc23a8223-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6490791e7abf6b29a381288cc23a8223-Reviews.html", "metareview": "", "pdf_size": 823647, "gs_citation": 112, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=214941467532559052&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Laboratory for Information and Inference Systems (LIONS), EPFL, Lausanne, Switzerland; Laboratory for Information and Inference Systems (LIONS), EPFL, Lausanne, Switzerland; Laboratory for Information and Inference Systems (LIONS), EPFL, Lausanne, Switzerland; Laboratory for Information and Inference Systems (LIONS), EPFL, Lausanne, Switzerland", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;epfl.ch", "email": "epfl.ch;epfl.ch;epfl.ch;epfl.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6490791e7abf6b29a381288cc23a8223-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "Laboratory for Information and Inference Systems (LIONS)", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "MixLasso: Generalized Mixed Regression via Convex Atomic-Norm Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12027", "id": "12027", "author_site": "Ian En-Hsu Yen, Wei-Cheng Lee, Kai Zhong, Sung-En Chang, Pradeep Ravikumar, Shou-De Lin", "author": "Ian En-Hsu Yen; Wei-Cheng Lee; Kai Zhong; Sung-En Chang; Pradeep K Ravikumar; Shou-De Lin", "abstract": "We consider a generalization of mixed regression where the response is an additive combination of several mixture components. Standard mixed regression is a special case where each response is generated from exactly one component. Typical approaches to the mixture regression problem employ local search methods such as Expectation Maximization (EM) that are prone to spurious local optima. On the other hand, a number of recent theoretically-motivated \\emph{Tensor-based methods} either have high sample complexity, or require the knowledge of the input distribution, which is not available in most of practical situations. In this work, we study a novel convex estimator \\emph{MixLasso} for the estimation of generalized mixed regression, based on an atomic norm specifically constructed to regularize the number of mixture components. Our algorithm gives a risk bound that trades off between prediction accuracy and model sparsity without imposing stringent assumptions on the input/output distribution, and can be easily adapted to the case of non-linear functions. In our numerical experiments on mixtures of linear as well as nonlinear regressions, the proposed method yields high-quality solutions in a wider range of settings than existing approaches.", "bibtex": "@inproceedings{NEURIPS2018_09779bb7,\n author = {Yen, Ian En-Hsu and Lee, Wei-Cheng and Zhong, Kai and Chang, Sung-En and Ravikumar, Pradeep K and Lin, Shou-De},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {MixLasso: Generalized Mixed Regression via Convex Atomic-Norm Regularization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/09779bb7930c8a0a44360e12b538ae3c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/09779bb7930c8a0a44360e12b538ae3c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/09779bb7930c8a0a44360e12b538ae3c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/09779bb7930c8a0a44360e12b538ae3c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/09779bb7930c8a0a44360e12b538ae3c-Reviews.html", "metareview": "", "pdf_size": 834777, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=247926894280471282&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Carnegie Mellon University\u2020Snap Inc.; National Taiwan University; National Taiwan University; Amazon Inc.; Carnegie Mellon University; National Taiwan University", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/09779bb7930c8a0a44360e12b538ae3c-Abstract.html", "aff_unique_index": "0;1;1;2;0;1", "aff_unique_norm": "Carnegie Mellon University;National Taiwan University;Amazon", "aff_unique_dep": ";;Amazon", "aff_unique_url": "https://www.cmu.edu;https://www.ntu.edu.tw;https://www.amazon.com", "aff_unique_abbr": "CMU;NTU;Amazon", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;1;1;0;0;1", "aff_country_unique": "United States;China" }, { "title": "Mixture Matrix Completion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11230", "id": "11230", "author": "Daniel Pimentel-Alarcon", "abstract": "Completing a data matrix X has become an ubiquitous problem in modern data science, with motivations in recommender systems, computer vision, and networks inference, to name a few. One typical assumption is that X is low-rank. A more general model assumes that each column of X corresponds to one of several low-rank matrices. This paper generalizes these models to what we call mixture matrix completion (MMC): the case where each entry of X corresponds to one of several low-rank matrices. MMC is a more accurate model for recommender systems, and brings more flexibility to other completion and clustering problems. We make four fundamental contributions about this new model. First, we show that MMC is theoretically possible (well-posed). Second, we give its precise information-theoretic identifiability conditions. Third, we derive the sample complexity of MMC. Finally, we give a practical algorithm for MMC with performance comparable to the state-of-the-art for simpler related problems, both on synthetic and real data.", "bibtex": "@inproceedings{NEURIPS2018_20d135f0,\n author = {Pimentel-Alarcon, Daniel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mixture Matrix Completion},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/20d135f0f28185b84a4cf7aa51f29500-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/20d135f0f28185b84a4cf7aa51f29500-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/20d135f0f28185b84a4cf7aa51f29500-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/20d135f0f28185b84a4cf7aa51f29500-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/20d135f0f28185b84a4cf7aa51f29500-Reviews.html", "metareview": "", "pdf_size": 534238, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11567055361745872727&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, Georgia State University", "aff_domain": "gsu.edu", "email": "gsu.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/20d135f0f28185b84a4cf7aa51f29500-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Georgia State University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.gsu.edu", "aff_unique_abbr": "GSU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Model Agnostic Supervised Local Explanations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11260", "id": "11260", "author_site": "Gregory Plumb, Denali Molitor, Ameet Talwalkar", "author": "Gregory Plumb; Denali Molitor; Ameet S Talwalkar", "abstract": "Model interpretability is an increasingly important component of practical machine learning. Some of the most common forms of interpretability systems are example-based, local, and global explanations. One of the main challenges in interpretability is designing explanation systems that can capture aspects of each of these explanation types, in order to develop a more thorough understanding of the model. We address this challenge in a novel model called MAPLE that uses local linear modeling techniques along with a dual interpretation of random forests (both as a supervised neighborhood approach and as a feature selection method). MAPLE has two fundamental advantages over existing interpretability systems. First, while it is effective as a black-box explanation system, MAPLE itself is a highly accurate predictive model that provides faithful self explanations, and thus sidesteps the typical accuracy-interpretability trade-off. Specifically, we demonstrate, on several UCI datasets, that MAPLE is at least as accurate as random forests and that it produces more faithful local explanations than LIME, a popular interpretability system. Second, MAPLE provides both example-based and local explanations and can detect global patterns, which allows it to diagnose limitations in its local explanations.", "bibtex": "@inproceedings{NEURIPS2018_b495ce63,\n author = {Plumb, Gregory and Molitor, Denali and Talwalkar, Ameet S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Model Agnostic Supervised Local Explanations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b495ce63ede0f4efc9eec62cb947c162-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b495ce63ede0f4efc9eec62cb947c162-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b495ce63ede0f4efc9eec62cb947c162-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b495ce63ede0f4efc9eec62cb947c162-Reviews.html", "metareview": "", "pdf_size": 305965, "gs_citation": 281, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3090118674779699868&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "CMU; UCLA; CMU", "aff_domain": "andrew.cmu.edu;math.ucla.edu;cmu.edu", "email": "andrew.cmu.edu;math.ucla.edu;cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b495ce63ede0f4efc9eec62cb947c162-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.ucla.edu", "aff_unique_abbr": "CMU;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Model-Agnostic Private Learning", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11684", "id": "11684", "author_site": "Raef Bassily, Abhradeep Guha Thakurta, Om Thakkar", "author": "Raef Bassily; Om Thakkar; Abhradeep Guha Thakurta", "abstract": "We design differentially private learning algorithms that are agnostic to the learning model assuming access to limited amount of unlabeled public data. First, we give a new differentially private algorithm for answering a sequence of $m$ online classification queries (given by a sequence of $m$ unlabeled public feature vectors) based on a private training set. Our private algorithm follows the paradigm of subsample-and-aggregate, in which any generic non-private learner is trained on disjoint subsets of the private training set, then for each classification query, the votes of the resulting classifiers ensemble are aggregated in a differentially private fashion. Our private aggregation is based on a novel combination of distance-to-instability framework [Smith & Thakurta 2013] and the sparse-vector technique [Dwork et al. 2009, Hardt & Talwar 2010]. We show that our algorithm makes a conservative use of the privacy budget. In particular, if the underlying non-private learner yields classification error at most $\\alpha\\in (0, 1)$, then our construction answers more queries, by at least a factor of $1/\\alpha$ in some cases, than what is implied by a straightforward application of the advanced composition theorem for differential privacy. Next, we apply the knowledge transfer technique to construct a private learner that outputs a classifier, which can be used to answer unlimited number of queries. In the PAC model, we analyze our construction and prove upper bounds on the sample complexity for both the realizable and the non-realizable cases. As in non-private sample complexity, our bounds are completely characterized by the VC dimension of the concept class.", "bibtex": "@inproceedings{NEURIPS2018_aa97d584,\n author = {Bassily, Raef and Thakkar, Om and Guha Thakurta, Abhradeep},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Model-Agnostic Private Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/aa97d584861474f4097cf13ccb5325da-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/aa97d584861474f4097cf13ccb5325da-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/aa97d584861474f4097cf13ccb5325da-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/aa97d584861474f4097cf13ccb5325da-Reviews.html", "metareview": "", "pdf_size": 325549, "gs_citation": 88, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17612827080737183718&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science & Engineering, The Ohio State University; Department of Computer Science, Boston University; Department of Computer Science, University of California Santa Cruz", "aff_domain": "osu.edu;bu.edu;ucsc.edu", "email": "osu.edu;bu.edu;ucsc.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/aa97d584861474f4097cf13ccb5325da-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Ohio State University;Boston University;University of California, Santa Cruz", "aff_unique_dep": "Department of Computer Science & Engineering;Department of Computer Science;Department of Computer Science", "aff_unique_url": "https://www.osu.edu;https://www.bu.edu;https://www.ucsc.edu", "aff_unique_abbr": "OSU;BU;UCSC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Model-based targeted dimensionality reduction for neuronal population data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11646", "id": "11646", "author_site": "Mikio Aoi, Jonathan Pillow", "author": "Mikio Aoi; Jonathan W Pillow", "abstract": "Summarizing high-dimensional data using a small number of parameters is a ubiquitous first step in the analysis of neuronal population activity. Recently developed methods use \"targeted\" approaches that work by identifying multiple, distinct low-dimensional subspaces of activity that capture the population response to individual experimental task variables, such as the value of a presented stimulus or the behavior of the animal. These methods have gained attention because they decompose total neural activity into what are ostensibly different parts of a neuronal computation. However, existing targeted methods have been developed outside of the confines of probabilistic modeling, making some aspects of the procedures ad hoc, or limited in flexibility or interpretability. Here we propose a new model-based method for targeted dimensionality reduction based on a probabilistic generative model of the population response data. The low-dimensional structure of our model is expressed as a low-rank factorization of a linear regression model. We perform efficient inference using a combination of expectation maximization and direct maximization of the marginal likelihood. We also develop an efficient method for estimating the dimensionality of each subspace. We show that our approach outperforms alternative methods in both mean squared error of the parameter estimates, and in identifying the correct dimensionality of encoding using simulated data. We also show that our method provides more accurate inference of low-dimensional subspaces of activity than a competing algorithm, demixed PCA.", "bibtex": "@inproceedings{NEURIPS2018_8a1ee9f2,\n author = {Aoi, Mikio and Pillow, Jonathan W},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Model-based targeted dimensionality reduction for neuronal population data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8a1ee9f2b7abe6e88d1a479ab6a42c5e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8a1ee9f2b7abe6e88d1a479ab6a42c5e-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8a1ee9f2b7abe6e88d1a479ab6a42c5e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8a1ee9f2b7abe6e88d1a479ab6a42c5e-Reviews.html", "metareview": "", "pdf_size": 2908370, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16627221268098771348&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Princeton Neuroscience Institute, Princeton University, Princeton, NJ 08544; Princeton Neuroscience Institute, Princeton University, Princeton, NJ 08544", "aff_domain": "princeton.edu;princeton.edu", "email": "princeton.edu;princeton.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8a1ee9f2b7abe6e88d1a479ab6a42c5e-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "Princeton Neuroscience Institute", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Princeton", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Modeling Dynamic Missingness of Implicit Feedback for Recommendation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11644", "id": "11644", "author_site": "Menghan Wang, Mingming Gong, Xiaolin Zheng, Kun Zhang", "author": "Menghan Wang; Mingming Gong; Xiaolin Zheng; Kun Zhang", "abstract": "Implicit feedback is widely used in collaborative filtering methods for recommendation. It is well known that implicit feedback contains a large number of values that are \\emph{missing not at random} (MNAR); and the missing data is a mixture of negative and unknown feedback, making it difficult to learn user's negative preferences. \nRecent studies modeled \\emph{exposure}, a latent missingness variable which indicates whether an item is missing to a user, to give each missing entry a confidence of being negative feedback.\nHowever, these studies use static models and ignore the information in temporal dependencies among items, which seems to be a essential underlying factor to subsequent missingness. To model and exploit the dynamics of missingness, we propose a latent variable named ``\\emph{user intent}'' to govern the temporal changes of item missingness, and a hidden Markov model to represent such a process. The resulting framework captures the dynamic item missingness and incorporate it into matrix factorization (MF) for recommendation. We also explore two types of constraints to achieve a more compact and interpretable representation of \\emph{user intents}. Experiments on real-world datasets demonstrate the superiority of our method against state-of-the-art recommender systems.", "bibtex": "@inproceedings{NEURIPS2018_8d9766a6,\n author = {Wang, Menghan and Gong, Mingming and Zheng, Xiaolin and Zhang, Kun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Modeling Dynamic Missingness of Implicit Feedback for Recommendation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8d9766a69b764fefc12f56739424d136-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8d9766a69b764fefc12f56739424d136-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8d9766a69b764fefc12f56739424d136-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8d9766a69b764fefc12f56739424d136-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8d9766a69b764fefc12f56739424d136-Reviews.html", "metareview": "", "pdf_size": 504213, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17058357881195039220&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "College of Computer Science, Zhejiang University; Department of Biomedical Informatics, University of Pittsburgh; College of Computer Science, Zhejiang University; Department of Philosophy, Carnegie Mellon University", "aff_domain": "zju.edu.cn;pitt.edu;zju.edu.cn;cmu.edu", "email": "zju.edu.cn;pitt.edu;zju.edu.cn;cmu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8d9766a69b764fefc12f56739424d136-Abstract.html", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Zhejiang University;University of Pittsburgh;Carnegie Mellon University", "aff_unique_dep": "College of Computer Science;Department of Biomedical Informatics;Department of Philosophy", "aff_unique_url": "http://www.zju.edu.cn;https://www.pitt.edu;https://www.cmu.edu", "aff_unique_abbr": "ZJU;Pitt;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Modelling and unsupervised learning of symmetric deformable object categories", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11783", "id": "11783", "author_site": "James Thewlis, Hakan Bilen, Andrea Vedaldi", "author": "James Thewlis; Hakan Bilen; Andrea Vedaldi", "abstract": "We propose a new approach to model and learn, without manual supervision, the symmetries of natural objects, such as faces or flowers, given only images as input. It is well known that objects that have a symmetric structure do not usually result in symmetric images due to articulation and perspective effects. This is often tackled by seeking the intrinsic symmetries of the underlying 3D shape, which is very difficult to do when the latter cannot be recovered reliably from data. We show that, if only raw images are given, it is possible to look instead for symmetries in the space of object deformations. We can then learn symmetries from an unstructured collection of images of the object as an extension of the recently-introduced object frame representation, modified so that object symmetries reduce to the obvious symmetry groups in the normalized space. We also show that our formulation provides an explanation of the ambiguities that arise in recovering the pose of symmetric objects from their shape or images and we provide a way of discounting such ambiguities in learning.", "bibtex": "@inproceedings{NEURIPS2018_1d640826,\n author = {Thewlis, James and Bilen, Hakan and Vedaldi, Andrea},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Modelling and unsupervised learning of symmetric deformable object categories},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1d6408264d31d453d556c60fe7d0459e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1d6408264d31d453d556c60fe7d0459e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1d6408264d31d453d556c60fe7d0459e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1d6408264d31d453d556c60fe7d0459e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1d6408264d31d453d556c60fe7d0459e-Reviews.html", "metareview": "", "pdf_size": 6215925, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3892262452359390253&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Visual Geometry Group, University of Oxford; School of Informatics, University of Edinburgh; Visual Geometry Group, University of Oxford", "aff_domain": "robots.ox.ac.uk;ed.ac.uk;robots.ox.ac.uk", "email": "robots.ox.ac.uk;ed.ac.uk;robots.ox.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1d6408264d31d453d556c60fe7d0459e-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Oxford;University of Edinburgh", "aff_unique_dep": "Visual Geometry Group;School of Informatics", "aff_unique_url": "https://www.ox.ac.uk;https://www.ed.ac.uk", "aff_unique_abbr": "Oxford;Edinburgh", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Oxford;Edinburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Modelling sparsity, heterogeneity, reciprocity and community structure in temporal interaction data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11244", "id": "11244", "author_site": "Xenia Miscouridou, Francois Caron, Yee Whye Teh", "author": "Xenia Miscouridou; Francois Caron; Yee Whye Teh", "abstract": "We propose a novel class of network models for temporal dyadic interaction data. Our objective is to capture important features often observed in social interactions: sparsity, degree heterogeneity, community structure and reciprocity. We use mutually-exciting Hawkes processes to model the interactions between each (directed) pair of individuals. The intensity of each process allows interactions to arise as responses to opposite interactions (reciprocity), or due to shared interests between individuals (community structure). For sparsity and degree heterogeneity, we build the non time dependent part of the intensity function on compound random measures following Todeschini et al., 2016. We conduct experiments on real-world temporal interaction data and show that the proposed model outperforms competing approaches for link prediction, and leads to interpretable parameters.", "bibtex": "@inproceedings{NEURIPS2018_160c8865,\n author = {Miscouridou, Xenia and Caron, Francois and Teh, Yee Whye},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Modelling sparsity, heterogeneity, reciprocity and community structure in temporal interaction data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/160c88652d47d0be60bfbfed25111412-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/160c88652d47d0be60bfbfed25111412-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/160c88652d47d0be60bfbfed25111412-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/160c88652d47d0be60bfbfed25111412-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/160c88652d47d0be60bfbfed25111412-Reviews.html", "metareview": "", "pdf_size": 1425511, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3157293658449719005&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Statistics, University of Oxford; Department of Statistics, University of Oxford; Department of Statistics, University of Oxford + DeepMind", "aff_domain": "stats.ox.ac.uk;stats.ox.ac.uk;stats.ox.ac.uk", "email": "stats.ox.ac.uk;stats.ox.ac.uk;stats.ox.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/160c88652d47d0be60bfbfed25111412-Abstract.html", "aff_unique_index": "0;0;0+1", "aff_unique_norm": "University of Oxford;DeepMind", "aff_unique_dep": "Department of Statistics;", "aff_unique_url": "https://www.ox.ac.uk;https://deepmind.com", "aff_unique_abbr": "Oxford;DeepMind", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0;0;0+0", "aff_country_unique": "United Kingdom" }, { "title": "Modern Neural Networks Generalize on Small Data Sets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11362", "id": "11362", "author_site": "Matthew Olson, Abraham Wyner, Richard Berk", "author": "Matthew Olson; Abraham Wyner; Richard Berk", "abstract": "In this paper, we use a linear program to empirically decompose fitted neural networks into ensembles of low-bias sub-networks. We show that these sub-networks are relatively uncorrelated which leads to an internal regularization process, very much like a random forest, which can explain why a neural network is surprisingly resistant to overfitting. We then demonstrate this in practice by applying large neural networks, with hundreds of parameters per training observation, to a collection of 116 real-world data sets from the UCI Machine Learning Repository. This collection of data sets contains a much smaller number of training examples than the types of image classification tasks generally studied in the deep learning literature, as well as non-trivial label noise. We show that even in this setting deep neural nets are capable of achieving superior classification accuracy without overfitting.", "bibtex": "@inproceedings{NEURIPS2018_fface838,\n author = {Olson, Matthew and Wyner, Abraham and Berk, Richard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Modern Neural Networks Generalize on Small Data Sets},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fface8385abbf94b4593a0ed53a0c70f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fface8385abbf94b4593a0ed53a0c70f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fface8385abbf94b4593a0ed53a0c70f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fface8385abbf94b4593a0ed53a0c70f-Reviews.html", "metareview": "", "pdf_size": 1015859, "gs_citation": 211, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18068544136382398083&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Statistics, Wharton School University of Pennsylvania; Department of Statistics, Wharton School University of Pennsylvania; Department of Statistics, Wharton School University of Pennsylvania", "aff_domain": "wharton.upenn.edu;wharton.upenn.edu;wharton.upenn.edu", "email": "wharton.upenn.edu;wharton.upenn.edu;wharton.upenn.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fface8385abbf94b4593a0ed53a0c70f-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "Department of Statistics", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Modular Networks: Learning to Decompose Neural Computation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11250", "id": "11250", "author_site": "Louis Kirsch, Julius Kunze, David Barber", "author": "Louis Kirsch; Julius Kunze; David Barber", "abstract": "Scaling model capacity has been vital in the success of deep learning. For a typical network, necessary compute resources and training time grow dramatically with model size. Conditional computation is a promising way to increase the number of parameters with a relatively small increase in resources. We propose a training algorithm that flexibly chooses neural modules based on the data to be processed. Both the decomposition and modules are learned end-to-end. In contrast to existing approaches, training does not rely on regularization to enforce diversity in module use. We apply modular networks both to image recognition and language modeling tasks, where we achieve superior performance compared to several baselines. Introspection reveals that modules specialize in interpretable contexts.", "bibtex": "@inproceedings{NEURIPS2018_310ce61c,\n author = {Kirsch, Louis and Kunze, Julius and Barber, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Modular Networks: Learning to Decompose Neural Computation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/310ce61c90f3a46e340ee8257bc70e93-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/310ce61c90f3a46e340ee8257bc70e93-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/310ce61c90f3a46e340ee8257bc70e93-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/310ce61c90f3a46e340ee8257bc70e93-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/310ce61c90f3a46e340ee8257bc70e93-Reviews.html", "metareview": "", "pdf_size": 1083996, "gs_citation": 152, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3981583393264042566&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science, University College London + IDSIA, The Swiss AI Lab (USI & SUPSI); Department of Computer Science, University College London; Department of Computer Science, University College London", "aff_domain": "louiskirsch.com;gmail.com;ucl.ac.uk", "email": "louiskirsch.com;gmail.com;ucl.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/310ce61c90f3a46e340ee8257bc70e93-Abstract.html", "aff_unique_index": "0+1;0;0", "aff_unique_norm": "University College London;IDSIA", "aff_unique_dep": "Department of Computer Science;The Swiss AI Lab", "aff_unique_url": "https://www.ucl.ac.uk;https://www.idsia.ch", "aff_unique_abbr": "UCL;IDSIA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "London;", "aff_country_unique_index": "0+1;0;0", "aff_country_unique": "United Kingdom;Switzerland" }, { "title": "Monte-Carlo Tree Search for Constrained POMDPs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11760", "id": "11760", "author_site": "Jongmin Lee, Geon-Hyeong Kim, Pascal Poupart, Kee-Eung Kim", "author": "Jongmin Lee; Geon-hyeong Kim; Pascal Poupart; Kee-Eung Kim", "abstract": "Monte-Carlo Tree Search (MCTS) has been successfully applied to very large POMDPs, a standard model for stochastic sequential decision-making problems. However, many real-world problems inherently have multiple goals, where multi-objective formulations are more natural. The constrained POMDP (CPOMDP) is such a model that maximizes the reward while constraining the cost, extending the standard POMDP model. To date, solution methods for CPOMDPs assume an explicit model of the environment, and thus are hardly applicable to large-scale real-world problems. In this paper, we present CC-POMCP (Cost-Constrained POMCP), an online MCTS algorithm for large CPOMDPs that leverages the optimization of LP-induced parameters and only requires a black-box simulator of the environment. In the experiments, we demonstrate that CC-POMCP converges to the optimal stochastic action selection in CPOMDP and pushes the state-of-the-art by being able to scale to very large problems.", "bibtex": "@inproceedings{NEURIPS2018_54c3d58c,\n author = {Lee, Jongmin and Kim, Geon-hyeong and Poupart, Pascal and Kim, Kee-Eung},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Monte-Carlo Tree Search for Constrained POMDPs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/54c3d58c5efcf59ddeb7486b7061ea5a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/54c3d58c5efcf59ddeb7486b7061ea5a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/54c3d58c5efcf59ddeb7486b7061ea5a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/54c3d58c5efcf59ddeb7486b7061ea5a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/54c3d58c5efcf59ddeb7486b7061ea5a-Reviews.html", "metareview": "", "pdf_size": 518444, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5477413600490574594&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "School of Computing, KAIST, Republic of Korea; School of Computing, KAIST, Republic of Korea; University of Waterloo, Waterloo AI Institute and Vector Institute; School of Computing, KAIST, Republic of Korea+PROWLER.io", "aff_domain": "ai.kaist.ac.kr;ai.kaist.ac.kr;uwaterloo.ca;cs.kaist.ac.kr", "email": "ai.kaist.ac.kr;ai.kaist.ac.kr;uwaterloo.ca;cs.kaist.ac.kr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/54c3d58c5efcf59ddeb7486b7061ea5a-Abstract.html", "aff_unique_index": "0;0;1;0+2", "aff_unique_norm": "KAIST;University of Waterloo;PROWLER.io", "aff_unique_dep": "School of Computing;Waterloo AI Institute;", "aff_unique_url": "https://www.kaist.ac.kr;https://uwaterloo.ca;https://prowler.io", "aff_unique_abbr": "KAIST;UWaterloo;PROWLER.io", "aff_campus_unique_index": "1;", "aff_campus_unique": ";Waterloo", "aff_country_unique_index": "0;0;1;0+2", "aff_country_unique": "South Korea;Canada;United Kingdom" }, { "title": "Moonshine: Distilling with Cheap Convolutions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11295", "id": "11295", "author_site": "Elliot Crowley, Gavia Gray, Amos Storkey", "author": "Elliot J. Crowley; Gavin Gray; Amos J. Storkey", "abstract": "Many engineers wish to deploy modern neural networks in memory-limited settings; but the development of flexible methods for reducing memory use is in its infancy, and there is little knowledge of the resulting cost-benefit. We propose structural model distillation for memory reduction using a strategy that produces a student architecture that is a simple transformation of the teacher architecture: no redesign is needed, and the same hyperparameters can be used. Using attention transfer, we provide Pareto curves/tables for distillation of residual networks with four benchmark datasets, indicating the memory versus accuracy payoff. We show that substantial memory savings are possible with very little loss of accuracy, and confirm that distillation provides student network performance that is better than training that student architecture directly on data.", "bibtex": "@inproceedings{NEURIPS2018_49b8b4f9,\n author = {Crowley, Elliot J. and Gray, Gavin and Storkey, Amos J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Moonshine: Distilling with Cheap Convolutions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/49b8b4f95f02e055801da3b4f58e28b7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/49b8b4f95f02e055801da3b4f58e28b7-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/49b8b4f95f02e055801da3b4f58e28b7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/49b8b4f95f02e055801da3b4f58e28b7-Reviews.html", "metareview": "", "pdf_size": 555832, "gs_citation": 148, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1198937430039662694&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "School of Informatics, University of Edinburgh; School of Informatics, University of Edinburgh; School of Informatics, University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk;ed.ac.uk", "email": "ed.ac.uk;ed.ac.uk;ed.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/49b8b4f95f02e055801da3b4f58e28b7-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "School of Informatics", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Edinburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Multi-Agent Generative Adversarial Imitation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11718", "id": "11718", "author_site": "Jiaming Song, Hongyu Ren, Dorsa Sadigh, Stefano Ermon", "author": "Jiaming Song; Hongyu Ren; Dorsa Sadigh; Stefano Ermon", "abstract": "Imitation learning algorithms can be used to learn a policy from expert demonstrations without access to a reward signal. However, most existing approaches are not applicable in multi-agent settings due to the existence of multiple (Nash) equilibria and non-stationary environments.\nWe propose a new framework for multi-agent imitation learning for general Markov games, where we build upon a generalized notion of inverse reinforcement learning. We further introduce a practical multi-agent actor-critic algorithm with good empirical performance. Our method can be used to imitate complex behaviors in high-dimensional environments with multiple cooperative or competing agents.", "bibtex": "@inproceedings{NEURIPS2018_240c945b,\n author = {Song, Jiaming and Ren, Hongyu and Sadigh, Dorsa and Ermon, Stefano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-Agent Generative Adversarial Imitation Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/240c945bb72980130446fc2b40fbb8e0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/240c945bb72980130446fc2b40fbb8e0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/240c945bb72980130446fc2b40fbb8e0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/240c945bb72980130446fc2b40fbb8e0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/240c945bb72980130446fc2b40fbb8e0-Reviews.html", "metareview": "", "pdf_size": 2974966, "gs_citation": 302, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16012180030772833830&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Stanford University; Stanford University; Stanford University; Stanford University", "aff_domain": "cs.stanford.edu;cs.stanford.edu;cs.stanford.edu;cs.stanford.edu", "email": "cs.stanford.edu;cs.stanford.edu;cs.stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/240c945bb72980130446fc2b40fbb8e0-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-Agent Reinforcement Learning via Double Averaging Primal-Dual Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11917", "id": "11917", "author_site": "Hoi-To Wai, Zhuoran Yang, Zhaoran Wang, Mingyi Hong", "author": "Hoi-To Wai; Zhuoran Yang; Zhaoran Wang; Mingyi Hong", "abstract": "Despite the success of single-agent reinforcement learning, multi-agent reinforcement learning (MARL) remains challenging due to complex interactions between agents. Motivated by decentralized applications such as sensor networks, swarm robotics, and power grids, we study policy evaluation in MARL, where agents with jointly observed state-action pairs and private local rewards collaborate to learn the value of a given policy.", "bibtex": "@inproceedings{NEURIPS2018_5a378f84,\n author = {Wai, Hoi-To and Yang, Zhuoran and Wang, Zhaoran and Hong, Mingyi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-Agent Reinforcement Learning via Double Averaging Primal-Dual Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5a378f8490c8d6af8647a753812f6e31-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5a378f8490c8d6af8647a753812f6e31-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5a378f8490c8d6af8647a753812f6e31-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5a378f8490c8d6af8647a753812f6e31-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5a378f8490c8d6af8647a753812f6e31-Reviews.html", "metareview": "", "pdf_size": 573741, "gs_citation": 212, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11374200586075903628&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "The Chinese University of Hong Kong; Princeton University; Northwestern University; University of Minnesota", "aff_domain": "se.cuhk.edu.hk;princeton.edu;gmail.com;umn.edu", "email": "se.cuhk.edu.hk;princeton.edu;gmail.com;umn.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5a378f8490c8d6af8647a753812f6e31-Abstract.html", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Chinese University of Hong Kong;Princeton University;Northwestern University;University of Minnesota", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.princeton.edu;https://www.northwestern.edu;https://www.minnesota.edu", "aff_unique_abbr": "CUHK;Princeton;NU;UMN", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "China;United States" }, { "title": "Multi-Class Learning: From Theory to Algorithm", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11173", "id": "11173", "author_site": "Jian Li, Yong Liu, Rong Yin, Hua Zhang, Lizhong Ding, Weiping Wang", "author": "Jian Li; Yong Liu; Rong Yin; Hua Zhang; Lizhong Ding; Weiping Wang", "abstract": "In this paper, we study the generalization performance of multi-class classification and obtain a shaper data-dependent generalization error bound with fast convergence rate, substantially improving the state-of-art bounds in the existing data-dependent generalization analysis. The theoretical analysis motivates us to devise two effective multi-class kernel learning algorithms with statistical guarantees. Experimental results show that our proposed methods can significantly outperform the existing multi-class classification methods.", "bibtex": "@inproceedings{NEURIPS2018_1141938b,\n author = {Li, Jian and Liu, Yong and Yin, Rong and Zhang, Hua and Ding, Lizhong and Wang, Weiping},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-Class Learning: From Theory to Algorithm},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1141938ba2c2b13f5505d7c424ebae5f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1141938ba2c2b13f5505d7c424ebae5f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1141938ba2c2b13f5505d7c424ebae5f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1141938ba2c2b13f5505d7c424ebae5f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1141938ba2c2b13f5505d7c424ebae5f-Reviews.html", "metareview": "", "pdf_size": 369763, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15568729023732421809&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Institute of Information Engineering, Chinese Academy of Sciences+School of Cyber Security, University of Chinese Academy of Sciences+National Engineering Research Center for Information Security+National Engineering Laboratory for Information Security Technology; Institute of Information Engineering, Chinese Academy of Sciences; Institute of Information Engineering, Chinese Academy of Sciences+School of Cyber Security, University of Chinese Academy of Sciences; Institute of Information Engineering, Chinese Academy of Sciences; Inception Institute of Arti\ufb01cial Intelligence (IIAI), Abu Dhabi, UAE; Institute of Information Engineering, Chinese Academy of Sciences+National Engineering Research Center for Information Security+National Engineering Laboratory for Information Security Technology", "aff_domain": "iie.ac.cn;iie.ac.cn;iie.ac.cn;iie.ac.cn;inceptioniai.org; ", "email": "iie.ac.cn;iie.ac.cn;iie.ac.cn;iie.ac.cn;inceptioniai.org; ", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1141938ba2c2b13f5505d7c424ebae5f-Abstract.html", "aff_unique_index": "0+1+2+3;0;0+1;0;4;0+2+3", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;National Engineering Research Center for Information Security;National Engineering Laboratory for Information Security Technology;Inception Institute of Artificial Intelligence", "aff_unique_dep": "Institute of Information Engineering;School of Cyber Security;;;", "aff_unique_url": "http://www.cas.cn;http://www.ucas.ac.cn;;;", "aff_unique_abbr": "CAS;UCAS;;;IIAI", "aff_campus_unique_index": ";;1;", "aff_campus_unique": ";Abu Dhabi", "aff_country_unique_index": "0+0+0+0;0;0+0;0;1;0+0+0", "aff_country_unique": "China;United Arab Emirates" }, { "title": "Multi-Layered Gradient Boosting Decision Trees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11356", "id": "11356", "author_site": "Ji Feng, Yang Yu, Zhi-Hua Zhou", "author": "Ji Feng; Yang Yu; Zhi-Hua Zhou", "abstract": "Multi-layered distributed representation is believed to be the key ingredient of deep neural networks especially in cognitive tasks like computer vision. While non-differentiable models such as gradient boosting decision trees (GBDTs) are still the dominant methods for modeling discrete or tabular data, they are hard to incorporate with such representation learning ability. In this work, we propose the multi-layered GBDT forest (mGBDTs), with an explicit emphasis on exploring the ability to learn hierarchical distributed representations by stacking several layers of regression GBDTs as its building block. The model can be jointly trained by a variant of target propagation across layers, without the need to derive backpropagation nor differentiability. Experiments confirmed the effectiveness of the model in terms of performance and representation learning ability.", "bibtex": "@inproceedings{NEURIPS2018_39027dfa,\n author = {Feng, Ji and Yu, Yang and Zhou, Zhi-Hua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-Layered Gradient Boosting Decision Trees},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/39027dfad5138c9ca0c474d71db915c3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/39027dfad5138c9ca0c474d71db915c3-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/39027dfad5138c9ca0c474d71db915c3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/39027dfad5138c9ca0c474d71db915c3-Reviews.html", "metareview": "", "pdf_size": 2659268, "gs_citation": 152, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16803264774280097122&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "National Key Lab for Novel Software Technology, Nanjing University, China+Sinovation Ventures AI Institute; National Key Lab for Novel Software Technology, Nanjing University, China; National Key Lab for Novel Software Technology, Nanjing University, China", "aff_domain": "lamda.nju.edu.cn;lamda.nju.edu.cn;lamda.nju.edu.cn", "email": "lamda.nju.edu.cn;lamda.nju.edu.cn;lamda.nju.edu.cn", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/39027dfad5138c9ca0c474d71db915c3-Abstract.html", "aff_unique_index": "0+1;0;0", "aff_unique_norm": "Nanjing University;Sinovation Ventures AI Institute", "aff_unique_dep": "National Key Lab for Novel Software Technology;AI Institute", "aff_unique_url": "http://www.nju.edu.cn;https://www.sinovationventures.com/", "aff_unique_abbr": "Nanjing U;SVAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;0", "aff_country_unique": "China" }, { "title": "Multi-Task Learning as Multi-Objective Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11076", "id": "11076", "author_site": "Ozan Sener, Vladlen Koltun", "author": "Ozan Sener; Vladlen Koltun", "abstract": "In multi-task learning, multiple tasks are solved jointly, sharing inductive bias between them. Multi-task learning is inherently a multi-objective problem because different tasks may conflict, necessitating a trade-off. A common compromise is to optimize a proxy objective that minimizes a weighted linear combination of per-task losses. However, this workaround is only valid when the tasks do not compete, which is rarely the case. In this paper, we explicitly cast multi-task learning as multi-objective optimization, with the overall objective of finding a Pareto optimal solution. To this end, we use algorithms developed in the gradient-based multi-objective optimization literature. These algorithms are not directly applicable to large-scale learning problems since they scale poorly with the dimensionality of the gradients and the number of tasks. We therefore propose an upper bound for the multi-objective loss and show that it can be optimized efficiently. We further prove that optimizing this upper bound yields a Pareto optimal solution under realistic assumptions. We apply our method to a variety of multi-task deep learning problems including digit classification, scene understanding (joint semantic segmentation, instance segmentation, and depth estimation), and multi-label classification. Our method produces higher-performing models than recent multi-task learning formulations or per-task training.", "bibtex": "@inproceedings{NEURIPS2018_432aca3a,\n author = {Sener, Ozan and Koltun, Vladlen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-Task Learning as Multi-Objective Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/432aca3a1e345e339f35a30c8f65edce-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/432aca3a1e345e339f35a30c8f65edce-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/432aca3a1e345e339f35a30c8f65edce-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/432aca3a1e345e339f35a30c8f65edce-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/432aca3a1e345e339f35a30c8f65edce-Reviews.html", "metareview": "", "pdf_size": 843064, "gs_citation": 1633, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7092916310292802870&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Intel Labs; Intel Labs", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/432aca3a1e345e339f35a30c8f65edce-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Intel", "aff_unique_dep": "Intel Labs", "aff_unique_url": "https://www.intel.com", "aff_unique_abbr": "Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Multi-Task Zipping via Layer-wise Neuron Sharing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11584", "id": "11584", "author_site": "Xiaoxi He, Zimu Zhou, Lothar Thiele", "author": "Xiaoxi He; Zimu Zhou; Lothar Thiele", "abstract": "Future mobile devices are anticipated to perceive, understand and react to the world on their own by running multiple correlated deep neural networks on-device. Yet the complexity of these neural networks needs to be trimmed down both within-model and cross-model to fit in mobile storage and memory. Previous studies focus on squeezing the redundancy within a single neural network. In this work, we aim to reduce the redundancy across multiple models. We propose Multi-Task Zipping (MTZ), a framework to automatically merge correlated, pre-trained deep neural networks for cross-model compression. Central in MTZ is a layer-wise neuron sharing and incoming weight updating scheme that induces a minimal change in the error function. MTZ inherits information from each model and demands light retraining to re-boost the accuracy of individual tasks. Evaluations show that MTZ is able to fully merge the hidden layers of two VGG-16 networks with a 3.18% increase in the test error averaged on ImageNet and CelebA, or share 39.61% parameters between the two networks with <0.5% increase in the test errors for both tasks. The number of iterations to retrain the combined network is at least 17.8 times lower than that of training a single VGG-16 network. Moreover, experiments show that MTZ is also able to effectively merge multiple residual networks.", "bibtex": "@inproceedings{NEURIPS2018_ad8e88c0,\n author = {He, Xiaoxi and Zhou, Zimu and Thiele, Lothar},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-Task Zipping via Layer-wise Neuron Sharing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ad8e88c0f76fa4fc8e5474384142a00a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ad8e88c0f76fa4fc8e5474384142a00a-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ad8e88c0f76fa4fc8e5474384142a00a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ad8e88c0f76fa4fc8e5474384142a00a-Reviews.html", "metareview": "", "pdf_size": 230245, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1961977466770047377&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "ETH Zurich; ETH Zurich; ETH Zurich", "aff_domain": "ethz.ch;tik.ee.ethz.ch;ethz.ch", "email": "ethz.ch;tik.ee.ethz.ch;ethz.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ad8e88c0f76fa4fc8e5474384142a00a-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Multi-View Silhouette and Depth Decomposition for High Resolution 3D Object Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11626", "id": "11626", "author_site": "Edward Smith, Scott Fujimoto, David Meger", "author": "Edward Smith; Scott Fujimoto; David Meger", "abstract": "We consider the problem of scaling deep generative shape models to high-resolution. Drawing motivation from the canonical view representation of objects, we introduce a novel method for the fast up-sampling of 3D objects in voxel space through networks that perform super-resolution on the six orthographic depth projections. This allows us to generate high-resolution objects with more efficient scaling than methods which work directly in 3D. We decompose the problem of 2D depth super-resolution into silhouette and depth prediction to capture both structure and fine detail. This allows our method to generate sharp edges more easily than an individual network. We evaluate our work on multiple experiments concerning high-resolution 3D objects, and show our system is capable of accurately predicting novel objects at resolutions as large as 512x512x512 -- the highest resolution reported for this task. We achieve state-of-the-art performance on 3D object reconstruction from RGB images on the ShapeNet dataset, and further demonstrate the first effective 3D super-resolution method.", "bibtex": "@inproceedings{NEURIPS2018_39ae2ed1,\n author = {Smith, Edward and Fujimoto, Scott and Meger, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-View Silhouette and Depth Decomposition for High Resolution 3D Object Representation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/39ae2ed11b14a4ccb41d35e9d1ba5d11-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/39ae2ed11b14a4ccb41d35e9d1ba5d11-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/39ae2ed11b14a4ccb41d35e9d1ba5d11-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/39ae2ed11b14a4ccb41d35e9d1ba5d11-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/39ae2ed11b14a4ccb41d35e9d1ba5d11-Reviews.html", "metareview": "", "pdf_size": 5507868, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17233245625263547556&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "McGill University; McGill University; McGill University", "aff_domain": "mail.mcgill.ca;mail.mcgill.ca;cim.mcgill.ca", "email": "mail.mcgill.ca;mail.mcgill.ca;cim.mcgill.ca", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/39ae2ed11b14a4ccb41d35e9d1ba5d11-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "McGill University", "aff_unique_dep": "", "aff_unique_url": "https://www.mcgill.ca", "aff_unique_abbr": "McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Multi-armed Bandits with Compensation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11500", "id": "11500", "author_site": "Siwei Wang, Longbo Huang", "author": "Siwei Wang; Longbo Huang", "abstract": "We propose and study the known-compensation multi-arm bandit (KCMAB) problem, where a system controller offers a set of arms to many short-term players for $T$ steps. In each step, one short-term player arrives to the system. Upon arrival, the player greedily selects an arm with the current best average reward and receives a stochastic reward associated with the arm. In order to incentivize players to explore other arms, the controller provides proper payment compensation to players. The objective of the controller is to maximize the total reward collected by players while minimizing the compensation. We first give a compensation lower bound $\\Theta(\\sum_i {\\Delta_i\\log T\\over KL_i})$, where $\\Delta_i$ and $KL_i$ are the expected reward gap and Kullback-Leibler (KL) divergence between distributions of arm $i$ and the best arm, respectively. We then analyze three algorithms to solve the KCMAB problem, and obtain their regrets and compensations. We show that the algorithms all achieve $O(\\log T)$ regret and $O(\\log T)$ compensation that match the theoretical lower bound. Finally, we use experiments to show the behaviors of those algorithms.", "bibtex": "@inproceedings{NEURIPS2018_8bdb5058,\n author = {Wang, Siwei and Huang, Longbo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-armed Bandits with Compensation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8bdb5058376143fa358981954e7626b8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8bdb5058376143fa358981954e7626b8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8bdb5058376143fa358981954e7626b8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8bdb5058376143fa358981954e7626b8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8bdb5058376143fa358981954e7626b8-Reviews.html", "metareview": "", "pdf_size": 472364, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15873207456649639876&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "IIIS, Tsinghua University; IIIS, Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn", "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8bdb5058376143fa358981954e7626b8-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Institute for Interdisciplinary Information Sciences", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Multi-domain Causal Structure Learning in Linear Systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11607", "id": "11607", "author_site": "AmirEmad Ghassami, Negar Kiyavash, Biwei Huang, Kun Zhang", "author": "AmirEmad Ghassami; Negar Kiyavash; Biwei Huang; Kun Zhang", "abstract": "We study the problem of causal structure learning in linear systems from observational data given in multiple domains, across which the causal coefficients and/or the distribution of the exogenous noises may vary. The main tool used in our approach is the principle that in a causally sufficient system, the causal modules, as well as their included parameters, change independently across domains. We first introduce our approach for finding causal direction in a system comprising two variables and propose efficient methods for identifying causal direction. Then we generalize our methods to causal structure learning in networks of variables. Most of previous work in structure learning from multi-domain data assume that certain types of invariance are held in causal modules across domains. Our approach unifies the idea in those works and generalizes to the case that there is no such invariance across the domains. Our proposed methods are generally capable of identifying causal direction from fewer than ten domains. When the invariance property holds, two domains are generally sufficient.", "bibtex": "@inproceedings{NEURIPS2018_6ad4174e,\n author = {Ghassami, AmirEmad and Kiyavash, Negar and Huang, Biwei and Zhang, Kun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-domain Causal Structure Learning in Linear Systems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6ad4174eba19ecb5fed17411a34ff5e6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6ad4174eba19ecb5fed17411a34ff5e6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6ad4174eba19ecb5fed17411a34ff5e6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6ad4174eba19ecb5fed17411a34ff5e6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6ad4174eba19ecb5fed17411a34ff5e6-Reviews.html", "metareview": "", "pdf_size": 624305, "gs_citation": 78, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14580132806835756596&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of ECE, University of Illinois at Urbana-Champaign, Urbana, IL, USA; School of ISyE and ECE, Georgia Institute of Technology, Atlanta, GA, USA; Department of Philosophy, Carnegie Mellon University, Pittsburgh, PA, USA; Department of Philosophy, Carnegie Mellon University, Pittsburgh, PA, USA", "aff_domain": "illinois.edu; ; ; ", "email": "illinois.edu; ; ; ", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6ad4174eba19ecb5fed17411a34ff5e6-Abstract.html", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Georgia Institute of Technology;Carnegie Mellon University", "aff_unique_dep": "Department of Electrical and Computer Engineering;School of Industrial and Systems Engineering and Electrical and Computer Engineering;Department of Philosophy", "aff_unique_url": "https://illinois.edu;https://www.gatech.edu;https://www.cmu.edu", "aff_unique_abbr": "UIUC;Georgia Tech;CMU", "aff_campus_unique_index": "0;1;2;2", "aff_campus_unique": "Urbana;Atlanta;Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-objective Maximization of Monotone Submodular Functions with Cardinality Constraint", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11903", "id": "11903", "author": "Rajan Udwani", "abstract": "We consider the problem of multi-objective maximization of monotone submodular functions subject to cardinality constraint, often formulated as $\\max_{|A|=k}\\min_{i\\in\\{1,\\dots,m\\}}f_i(A)$. While it is widely known that greedy methods work well for a single objective, the problem becomes much harder with multiple objectives. In fact, Krause et al.\\ (2008) showed that when the number of objectives $m$ grows as the cardinality $k$ i.e., $m=\\Omega(k)$, the problem is inapproximable (unless $P=NP$). On the other hand, when $m$ is constant Chekuri et al.\\ (2010) showed a randomized $(1-1/e)-\\epsilon$ approximation with runtime (number of queries to function oracle) $n^{m/\\epsilon^3}$. %In fact, the result of Chekuri et al.\\ (2010) is for the far more general case of matroid constant. \n\t\n\tWe focus on finding a fast and practical algorithm that has (asymptotic) approximation guarantees even when $m$ is super constant. We first modify the algorithm of Chekuri et al.\\ (2010) to achieve a $(1-1/e)$ approximation for $m=o(\\frac{k}{\\log^3 k})$. This demonstrates a steep transition from constant factor approximability to inapproximability around $m=\\Omega(k)$. Then using Multiplicative-Weight-Updates (MWU), we find a much faster $\\tilde{O}(n/\\delta^3)$ time asymptotic $(1-1/e)^2-\\delta$ approximation. While the above results are all randomized, we also give a simple deterministic $(1-1/e)-\\epsilon$ approximation with runtime $kn^{m/\\epsilon^4}$. Finally, we run synthetic experiments using Kronecker graphs and find that our MWU inspired heuristic outperforms existing heuristics.", "bibtex": "@inproceedings{NEURIPS2018_7e448ed9,\n author = {Udwani, Rajan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-objective Maximization of Monotone Submodular Functions with Cardinality Constraint},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7e448ed9dd44e6e22442dac8e21856ae-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7e448ed9dd44e6e22442dac8e21856ae-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7e448ed9dd44e6e22442dac8e21856ae-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7e448ed9dd44e6e22442dac8e21856ae-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7e448ed9dd44e6e22442dac8e21856ae-Reviews.html", "metareview": "", "pdf_size": 540152, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7522397493376201967&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Operations Research Center, M.I.T.", "aff_domain": "alum.mit.edu", "email": "alum.mit.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7e448ed9dd44e6e22442dac8e21856ae-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Operations Research Center", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Multi-value Rule Sets for Interpretable Classification with Feature-Efficient Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12024", "id": "12024", "author": "Tong Wang", "abstract": "We present the Multi-value Rule Set (MRS) for interpretable\nclassification with feature efficient presentations. Compared to\nrule sets built from single-value rules, MRS adopts a more\ngeneralized form of association rules that allows multiple values\nin a condition. Rules of this form are more concise than classical\nsingle-value rules in capturing and describing patterns in data.\nOur formulation also pursues a higher efficiency of feature utilization,\nwhich reduces possible cost in data collection and storage.\nWe propose a Bayesian framework for formulating an MRS model\nand develop an efficient inference method for learning a maximum\na posteriori, incorporating theoretically grounded bounds to iteratively\nreduce the search space and improve the search efficiency.\nExperiments on synthetic and real-world data demonstrate that\nMRS models have significantly smaller complexity and fewer features\nthan baseline models while being competitive in predictive\naccuracy.", "bibtex": "@inproceedings{NEURIPS2018_32bbf7b2,\n author = {Wang, Tong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-value Rule Sets for Interpretable Classification with Feature-Efficient Representations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/32bbf7b2bc4ed14eb1e9c2580056a989-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/32bbf7b2bc4ed14eb1e9c2580056a989-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/32bbf7b2bc4ed14eb1e9c2580056a989-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/32bbf7b2bc4ed14eb1e9c2580056a989-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/32bbf7b2bc4ed14eb1e9c2580056a989-Reviews.html", "metareview": "", "pdf_size": 179426, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13805737803480413432&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Tippie School of Business, University of Iowa, Iowa City, IA 52242", "aff_domain": "uiowa.edu", "email": "uiowa.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/32bbf7b2bc4ed14eb1e9c2580056a989-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "University of Iowa", "aff_unique_dep": "Tippie School of Business", "aff_unique_url": "https://tippie.uiowa.edu", "aff_unique_abbr": "UIowa", "aff_campus_unique_index": "0", "aff_campus_unique": "Iowa City", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Multilingual Anchoring: Interactive Topic Modeling and Alignment Across Languages", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11827", "id": "11827", "author_site": "Michelle Yuan, Benjamin Van Durme, Jordan Boyd-Graber", "author": "Michelle Yuan; Benjamin Van Durme; Jordan L Ying", "abstract": "Multilingual topic models can reveal patterns in cross-lingual document collections. However, existing models lack speed and interactivity, which prevents adoption in everyday corpora exploration or quick moving situations (e.g., natural disasters, political instability). First, we propose a multilingual anchoring algorithm that builds an anchor-based topic model for documents in different languages. Then, we incorporate interactivity to develop MTAnchor (Multilingual Topic Anchors), a system that allows users to refine the topic model. We test our algorithms on labeled English, Chinese, and Sinhalese documents. Within minutes, our methods can produce interpretable topics that are useful for specific classification tasks.", "bibtex": "@inproceedings{NEURIPS2018_28b9f8aa,\n author = {Yuan, Michelle and Van Durme, Benjamin and Ying, Jordan L},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multilingual Anchoring: Interactive Topic Modeling and Alignment Across Languages},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/28b9f8aa9f07db88404721af4a5b6c11-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/28b9f8aa9f07db88404721af4a5b6c11-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/28b9f8aa9f07db88404721af4a5b6c11-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/28b9f8aa9f07db88404721af4a5b6c11-Reviews.html", "metareview": "", "pdf_size": 761120, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12128120271299187435&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "University of Maryland; John Hopkins University; University of Maryland", "aff_domain": "cs.umd.edu;jhu.edu;umiacs.umd.edu", "email": "cs.umd.edu;jhu.edu;umiacs.umd.edu", "github": "http://github.com/forest-snow/mtanchor_demo", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/28b9f8aa9f07db88404721af4a5b6c11-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Maryland;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.jhu.edu", "aff_unique_abbr": "UMD;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Multimodal Generative Models for Scalable Weakly-Supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11544", "id": "11544", "author_site": "Mike Wu, Noah Goodman", "author": "Mike Wu; Noah Goodman", "abstract": "Multiple modalities often co-occur when describing natural phenomena. Learning a joint representation of these modalities should yield deeper and more useful representations.Previous generative approaches to multi-modal input either do not learn a joint distribution or require additional computation to handle missing data. Here, we introduce a multimodal variational autoencoder (MVAE) that uses a product-of-experts inference network and a sub-sampled training paradigm to solve the multi-modal inference problem. Notably, our model shares parameters to efficiently learn under any combination of missing modalities. We apply the MVAE on four datasets and match state-of-the-art performance using many fewer parameters. In addition, we show that the MVAE is directly applicable to weakly-supervised learning, and is robust to incomplete supervision. We then consider two case studies, one of learning image transformations---edge detection, colorization, segmentation---as a set of modalities, followed by one of machine translation between two languages. We find appealing results across this range of tasks.", "bibtex": "@inproceedings{NEURIPS2018_1102a326,\n author = {Wu, Mike and Goodman, Noah},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multimodal Generative Models for Scalable Weakly-Supervised Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1102a326d5f7c9e04fc3c89d0ede88c9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1102a326d5f7c9e04fc3c89d0ede88c9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1102a326d5f7c9e04fc3c89d0ede88c9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1102a326d5f7c9e04fc3c89d0ede88c9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1102a326d5f7c9e04fc3c89d0ede88c9-Reviews.html", "metareview": "", "pdf_size": 2467152, "gs_citation": 498, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3003516605424055081&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, Stanford University; Departments of Computer Science and Psychology, Stanford University", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1102a326d5f7c9e04fc3c89d0ede88c9-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Multiple Instance Learning for Efficient Sequential Data Classification on Resource-constrained Devices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12035", "id": "12035", "author_site": "Don Dennis, Chirag Pabbaraju, Harsha Vardhan Simhadri, Prateek Jain", "author": "Don Dennis; Chirag Pabbaraju; Harsha Vardhan Simhadri; Prateek Jain", "abstract": "We study the problem of fast and efficient classification of sequential data (such as\ntime-series) on tiny devices, which is critical for various IoT related applications\nlike audio keyword detection or gesture detection. Such tasks are cast as a standard classification task by sliding windows over the data stream to construct data points. Deploying such classification modules on tiny devices is challenging as predictions over sliding windows of data need to be invoked continuously at a high frequency. Each such predictor instance in itself is expensive as it evaluates large models over long windows of data. In this paper, we address this challenge by exploiting the following two observations about classification tasks arising in typical IoT related applications: (a) the \"signature\" of a particular class (e.g. an audio keyword) typically occupies a small fraction of the overall data, and (b) class signatures tend to be discernible early on in the data. We propose a method, EMI-RNN, that exploits these observations by using a multiple instance learning formulation along with an early prediction technique to learn a model that achieves better accuracy compared to baseline models, while simultaneously reducing computation by a large fraction. For instance, on a gesture detection benchmark [ 25 ], EMI-RNN improves standard LSTM model\u2019s accuracy by up to 1% while requiring 72x less computation. This enables us to deploy such models for continuous real-time prediction on a small device such as Raspberry Pi0 and Arduino variants, a task that the baseline LSTM could not achieve. Finally, we also provide an analysis of our multiple instance learning algorithm in a simple setting and show that the proposed algorithm converges to the global optima at a linear rate, one of the first such result in this domain. The code for EMI-RNN is available at: https://github.com/Microsoft/EdgeML/tree/master/tf/examples/EMI-RNN", "bibtex": "@inproceedings{NEURIPS2018_d9fbed9d,\n author = {Dennis, Don and Pabbaraju, Chirag and Simhadri, Harsha Vardhan and Jain, Prateek},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multiple Instance Learning for Efficient Sequential Data Classification on Resource-constrained Devices},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d9fbed9da256e344c1fa46bb46c34c5f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d9fbed9da256e344c1fa46bb46c34c5f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d9fbed9da256e344c1fa46bb46c34c5f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d9fbed9da256e344c1fa46bb46c34c5f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d9fbed9da256e344c1fa46bb46c34c5f-Reviews.html", "metareview": "", "pdf_size": 486852, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1777978474704177693&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Microsoft Research, India; Microsoft Research, India; Microsoft Research, India; Microsoft Research, India", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com", "email": "microsoft.com;microsoft.com;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d9fbed9da256e344c1fa46bb46c34c5f-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/india.aspx", "aff_unique_abbr": "MSR India", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "title": "Multiple-Step Greedy Policies in Approximate and Online Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11512", "id": "11512", "author_site": "Yonathan Efroni, Gal Dalal, Bruno Scherrer, Shie Mannor", "author": "Yonathan Efroni; Gal Dalal; Bruno Scherrer; Shie Mannor", "abstract": "Multiple-step lookahead policies have demonstrated high empirical competence in Reinforcement Learning, via the use of Monte Carlo Tree Search or Model Predictive Control. In a recent work (Efroni et al., 2018), multiple-step greedy policies and their use in vanilla Policy Iteration algorithms were proposed and analyzed. In this work, we study multiple-step greedy algorithms in more practical setups. We begin by highlighting a counter-intuitive difficulty, arising with soft-policy updates: even in the absence of approximations, and contrary to the 1-step-greedy case, monotonic policy improvement is not guaranteed unless the update stepsize is sufficiently large. Taking particular care about this difficulty, we formulate and analyze online and approximate algorithms that use such a multi-step greedy operator.", "bibtex": "@inproceedings{NEURIPS2018_3f998e71,\n author = {Efroni, Yonathan and Dalal, Gal and Scherrer, Bruno and Mannor, Shie},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multiple-Step Greedy Policies in Approximate and Online Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3f998e713a6e02287c374fd26835d87e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3f998e713a6e02287c374fd26835d87e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3f998e713a6e02287c374fd26835d87e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3f998e713a6e02287c374fd26835d87e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3f998e713a6e02287c374fd26835d87e-Reviews.html", "metareview": "", "pdf_size": 323000, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17322108500862768700&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Department of Electrical Engineering, Technion, Israel Institute of Technology*; Department of Electrical Engineering, Technion, Israel Institute of Technology*; INRIA, Villers les Nancy, France\u2020; Department of Electrical Engineering, Technion, Israel Institute of Technology*", "aff_domain": "gmail.com;campus.technion.ac.il;inria.fr;ee.technion.ac.il", "email": "gmail.com;campus.technion.ac.il;inria.fr;ee.technion.ac.il", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3f998e713a6e02287c374fd26835d87e-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Technion - Israel Institute of Technology;INRIA", "aff_unique_dep": "Department of Electrical Engineering;", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.inria.fr", "aff_unique_abbr": "Technion;INRIA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Villers les Nancy", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Israel;France" }, { "title": "Multiplicative Weights Updates with Constant Step-Size in Graphical Constant-Sum Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11354", "id": "11354", "author": "Yun Kuen Cheung", "abstract": "Since Multiplicative Weights (MW) updates are the discrete analogue of the continuous Replicator Dynamics (RD), some researchers had expected their qualitative behaviours would be similar. We show that this is false in the context of graphical constant-sum games, which include two-person zero-sum games as special cases. In such games which have a fully-mixed Nash Equilibrium (NE), it was known that RD satisfy the permanence and Poincare recurrence properties, but we show that MW updates with any constant step-size eps > 0 converge to the boundary of the state space, and thus do not satisfy the two properties. Using this result, we show that MW updates have a regret lower bound of Omega( 1 / (eps T) ), while it was known that the regret of RD is upper bounded by O( 1 / T ).", "bibtex": "@inproceedings{NEURIPS2018_90e13578,\n author = {Cheung, Yun Kuen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multiplicative Weights Updates with Constant Step-Size in Graphical Constant-Sum Games},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/90e1357833654983612fb05e3ec9148c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/90e1357833654983612fb05e3ec9148c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/90e1357833654983612fb05e3ec9148c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/90e1357833654983612fb05e3ec9148c-Reviews.html", "metareview": "", "pdf_size": 372287, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2016042545289515751&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Singapore University of Technology and Design + Max-Planck Institute for Informatics, Saarland Informatics Campus", "aff_domain": "sutd.edu.sg", "email": "sutd.edu.sg", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/90e1357833654983612fb05e3ec9148c-Abstract.html", "aff_unique_index": "0+1", "aff_unique_norm": "Singapore University of Technology and Design;Max-Planck Institute for Informatics", "aff_unique_dep": ";", "aff_unique_url": "https://www.sutd.edu.sg;https://mpi-inf.mpg.de", "aff_unique_abbr": "SUTD;MPII", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saarland", "aff_country_unique_index": "0+1", "aff_country_unique": "Singapore;Germany" }, { "title": "Multitask Boosting for Survival Analysis with Competing Risks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11155", "id": "11155", "author_site": "Alexis Bellot, Mihaela van der Schaar", "author": "Alexis Bellot; Mihaela van der Schaar", "abstract": "The co-occurrence of multiple diseases among the general population is an important problem as those patients have more risk of complications and represent a large share of health care expenditure. Learning to predict time-to-event probabilities for these patients is a challenging problem because the risks of events are correlated (there are competing risks) with often only few patients experiencing individual events of interest, and of those only a fraction are actually observed in the data. We introduce in this paper a survival model with the flexibility to leverage a common representation of related events that is designed to correct for the strong imbalance in observed outcomes. The procedure is sequential: outcome-specific survival distributions form the components of nonparametric multivariate estimators which we combine into an ensemble in such a way as to ensure accurate predictions on all outcome types simultaneously. Our algorithm is general and represents the first boosting-like method for time-to-event data with multiple outcomes. We demonstrate the performance of our algorithm on synthetic and real data.", "bibtex": "@inproceedings{NEURIPS2018_2afe4567,\n author = {Bellot, Alexis and van der Schaar, Mihaela},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multitask Boosting for Survival Analysis with Competing Risks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2afe4567e1bf64d32a5527244d104cea-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2afe4567e1bf64d32a5527244d104cea-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2afe4567e1bf64d32a5527244d104cea-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2afe4567e1bf64d32a5527244d104cea-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2afe4567e1bf64d32a5527244d104cea-Reviews.html", "metareview": "", "pdf_size": 491308, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=180710140011701408&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "University of Oxford; University of Oxford + The Alan Turing Institute", "aff_domain": "eng.ox.ac.uk;turing.ac.uk", "email": "eng.ox.ac.uk;turing.ac.uk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2afe4567e1bf64d32a5527244d104cea-Abstract.html", "aff_unique_index": "0;0+1", "aff_unique_norm": "University of Oxford;Alan Turing Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.ox.ac.uk;https://www.turing.ac.uk", "aff_unique_abbr": "Oxford;ATI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0", "aff_country_unique": "United Kingdom" }, { "title": "Multivariate Convolutional Sparse Coding for Electromagnetic Brain Signals", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11332", "id": "11332", "author_site": "Tom Dupr\u00e9 la Tour, Thomas Moreau, Mainak Jas, Alexandre Gramfort", "author": "Tom Dupr\u00e9 la Tour; Thomas Moreau; Mainak Jas; Alexandre Gramfort", "abstract": "Frequency-specific patterns of neural activity are traditionally interpreted as sustained rhythmic oscillations, and related to cognitive mechanisms such as attention, high level visual processing or motor control. While alpha waves (8--12\\,Hz) are known to closely resemble short sinusoids, and thus are revealed by Fourier analysis or wavelet transforms, there is an evolving debate that electromagnetic neural signals are composed of more complex waveforms that cannot be analyzed by linear filters and traditional signal representations. In this paper, we propose to learn dedicated representations of such recordings using a multivariate convolutional sparse coding (CSC) algorithm. Applied to electroencephalography (EEG) or magnetoencephalography (MEG) data, this method is able to learn not only prototypical temporal waveforms, but also associated spatial patterns so their origin can be localized in the brain. Our algorithm is based on alternated minimization and a greedy coordinate descent solver that leads to state-of-the-art running time on long time series. To demonstrate the implications of this method, we apply it to MEG data and show that it is able to recover biological artifacts. More remarkably, our approach also reveals the presence of non-sinusoidal mu-shaped patterns, along with their topographic maps related to the somatosensory cortex.", "bibtex": "@inproceedings{NEURIPS2018_64f1f27b,\n author = {Dupr\\'{e} la Tour, Tom and Moreau, Thomas and Jas, Mainak and Gramfort, Alexandre},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multivariate Convolutional Sparse Coding for Electromagnetic Brain Signals},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/64f1f27bf1b4ec22924fd0acb550c235-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/64f1f27bf1b4ec22924fd0acb550c235-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/64f1f27bf1b4ec22924fd0acb550c235-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/64f1f27bf1b4ec22924fd0acb550c235-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/64f1f27bf1b4ec22924fd0acb550c235-Reviews.html", "metareview": "", "pdf_size": 1596948, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2397933842194531538&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/64f1f27bf1b4ec22924fd0acb550c235-Abstract.html" }, { "title": "Multivariate Time Series Imputation with Generative Adversarial Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11174", "id": "11174", "author_site": "Yonghong Luo, Xiangrui Cai, Ying ZHANG, Jun Xu, Yuan xiaojie", "author": "Yonghong Luo; Xiangrui Cai; Ying ZHANG; Jun Xu; Yuan xiaojie", "abstract": "Multivariate time series usually contain a large number of missing values, which hinders the application of advanced analysis methods on multivariate time series data. Conventional approaches to addressing the challenge of missing values, including mean/zero imputation, case deletion, and matrix factorization-based imputation, are all incapable of modeling the temporal dependencies and the nature of complex distribution in multivariate time series. In this paper, we treat the problem of missing value imputation as data generation. Inspired by the success of Generative Adversarial Networks (GAN) in image generation, we propose to learn the overall distribution of a multivariate time series dataset with GAN, which is further used to generate the missing values for each sample. Different from the image data, the time series data are usually incomplete due to the nature of data recording process. A modified Gate Recurrent Unit is employed in GAN to model the temporal irregularity of the incomplete time series. Experiments on two multivariate time series datasets show that the proposed model outperformed the baselines in terms of accuracy of imputation. Experimental results also showed that a simple model on the imputed data can achieve state-of-the-art results on the prediction tasks, demonstrating the benefits of our model in downstream applications.", "bibtex": "@inproceedings{NEURIPS2018_96b9bff0,\n author = {Luo, Yonghong and Cai, Xiangrui and ZHANG, Ying and Xu, Jun and xiaojie, Yuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multivariate Time Series Imputation with Generative Adversarial Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/96b9bff013acedfb1d140579e2fbeb63-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/96b9bff013acedfb1d140579e2fbeb63-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/96b9bff013acedfb1d140579e2fbeb63-Metadata.json", "review": "", "metareview": "", "pdf_size": 1281063, "gs_citation": 673, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8869542471030936619&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "College of Computer Science, Nankai University, Tianjin, China; College of Computer Science, Nankai University, Tianjin, China; College of Computer Science, Nankai University, Tianjin, China; School of Information, Renmin University of China, Beijing, China; College of Computer Science, Nankai University, Tianjin, China", "aff_domain": "dbis.nankai.edu.cn;dbis.nankai.edu.cn;nankai.edu.cn;ruc.edu.cn;nankai.edu.cn", "email": "dbis.nankai.edu.cn;dbis.nankai.edu.cn;nankai.edu.cn;ruc.edu.cn;nankai.edu.cn", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/96b9bff013acedfb1d140579e2fbeb63-Abstract.html", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Nankai University;Renmin University of China", "aff_unique_dep": "College of Computer Science;School of Information", "aff_unique_url": "http://www.nankai.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "Nankai;RUC", "aff_campus_unique_index": "0;0;0;1;0", "aff_campus_unique": "Tianjin;Beijing", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "NAIS-Net: Stable Deep Networks from Non-Autonomous Differential Equations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11308", "id": "11308", "author_site": "Marco Ciccone, Marco Gallieri, Jonathan Masci, Christian Osendorfer, Faustino Gomez", "author": "Marco Ciccone; Marco Gallieri; Jonathan Masci; Christian Osendorfer; Faustino Gomez", "abstract": "This paper introduces Non-Autonomous Input-Output Stable Network (NAIS-Net), a very deep architecture where each stacked processing block is derived from a time-invariant non-autonomous dynamical system. Non-autonomy is implemented by skip connections from the block input to each of the unrolled processing stages and allows stability to be enforced so that blocks can be unrolled adaptively to a pattern-dependent processing depth. NAIS-Net induces non-trivial, Lipschitz input-output maps, even for an infinite unroll length. We prove that the network is globally asymptotically stable so that for every initial condition there is exactly one input-dependent equilibrium assuming tanh units, and multiple stable equilibria for ReL units. An efficient implementation that enforces the stability under derived conditions for both fully-connected and convolutional layers is also presented. Experimental results show how NAIS-Net exhibits stability in practice, yielding a significant reduction in generalization gap compared to ResNets.", "bibtex": "@inproceedings{NEURIPS2018_7bd28f15,\n author = {Ciccone, Marco and Gallieri, Marco and Masci, Jonathan and Osendorfer, Christian and Gomez, Faustino},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {NAIS-Net: Stable Deep Networks from Non-Autonomous Differential Equations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7bd28f15a49d5e5848d6ec70e584e625-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7bd28f15a49d5e5848d6ec70e584e625-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7bd28f15a49d5e5848d6ec70e584e625-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7bd28f15a49d5e5848d6ec70e584e625-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7bd28f15a49d5e5848d6ec70e584e625-Reviews.html", "metareview": "", "pdf_size": 2842901, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17819458422043130038&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Politecnico di Milano+NNAISENSE SA; NNAISENSE SA; NNAISENSE SA; NNAISENSE SA; NNAISENSE SA", "aff_domain": "polimi.it;nnaisense.com;nnaisense.com;nnaisense.com;nnaisense.com", "email": "polimi.it;nnaisense.com;nnaisense.com;nnaisense.com;nnaisense.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7bd28f15a49d5e5848d6ec70e584e625-Abstract.html", "aff_unique_index": "0+1;1;1;1;1", "aff_unique_norm": "Politecnico di Milano;NNAISENSE", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it;https://www.nnaiseNSE.com", "aff_unique_abbr": "Polimi;NNAISENSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+1;1;1;1;1", "aff_country_unique": "Italy;Switzerland" }, { "title": "NEON2: Finding Local Minima via First-Order Oracles", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11371", "id": "11371", "author_site": "Zeyuan Allen-Zhu, Yuanzhi Li", "author": "Zeyuan Allen-Zhu; Yuanzhi Li", "abstract": "We propose a reduction for non-convex optimization that can (1) turn an stationary-point finding algorithm into an local-minimum finding one, and (2) replace the Hessian-vector product computations with only gradient computations. It works both in the stochastic and the deterministic settings, without hurting the algorithm's performance.", "bibtex": "@inproceedings{NEURIPS2018_d4b2aeb2,\n author = {Allen-Zhu, Zeyuan and Li, Yuanzhi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {NEON2: Finding Local Minima via First-Order Oracles},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d4b2aeb2453bdadaa45cbe9882ffefcf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d4b2aeb2453bdadaa45cbe9882ffefcf-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d4b2aeb2453bdadaa45cbe9882ffefcf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d4b2aeb2453bdadaa45cbe9882ffefcf-Reviews.html", "metareview": "", "pdf_size": 520591, "gs_citation": 165, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1843460514527877404&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Microsoft Research AI; Stanford University", "aff_domain": "csail.mit.edu;stanford.edu", "email": "csail.mit.edu;stanford.edu", "github": "", "project": "https://arxiv.org/abs/1711.06673", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d4b2aeb2453bdadaa45cbe9882ffefcf-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;Stanford University", "aff_unique_dep": "AI;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.stanford.edu", "aff_unique_abbr": "MSR;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Natasha 2: Faster Non-Convex Optimization Than SGD", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11275", "id": "11275", "author": "Zeyuan Allen-Zhu", "abstract": "We design a stochastic algorithm to find $\\varepsilon$-approximate local minima of any smooth nonconvex function in rate $O(\\varepsilon^{-3.25})$, with only oracle access to stochastic gradients. The best result before this work was $O(\\varepsilon^{-4})$ by stochastic gradient descent (SGD).", "bibtex": "@inproceedings{NEURIPS2018_79a49b3e,\n author = {Allen-Zhu, Zeyuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Natasha 2: Faster Non-Convex Optimization Than SGD},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/79a49b3e3762632813f9e35f4ba53d6c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/79a49b3e3762632813f9e35f4ba53d6c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/79a49b3e3762632813f9e35f4ba53d6c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/79a49b3e3762632813f9e35f4ba53d6c-Reviews.html", "metareview": "", "pdf_size": 937368, "gs_citation": 274, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8244627454979679380&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Microsoft Research AI", "aff_domain": "csail.mit.edu", "email": "csail.mit.edu", "github": "", "project": "https://arxiv.org/abs/1708.08694", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/79a49b3e3762632813f9e35f4ba53d6c-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "AI", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Navigating with Graph Representations for Fast and Scalable Decoding of Neural Language Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11611", "id": "11611", "author_site": "Minjia Zhang, Wenhan Wang, Xiaodong Liu, Jianfeng Gao, Yuxiong He", "author": "Minjia Zhang; Wenhan Wang; Xiaodong Liu; Jianfeng Gao; Yuxiong He", "abstract": "Neural language models (NLMs) have recently gained a renewed interest by achieving state-of-the-art performance across many natural language processing (NLP) tasks. However, NLMs are very computationally demanding largely due to the computational cost of the decoding process, which consists of a softmax layer over a large vocabulary.We observe that in the decoding of many NLP tasks, only the probabilities of the top-K hypotheses need to be calculated preciously and K is often much smaller than the vocabulary size.\nThis paper proposes a novel softmax layer approximation algorithm, called Fast Graph Decoder (FGD), which quickly identifies, for a given context, a set of K words that are most likely to occur according to a NLM. We demonstrate that FGD reduces the decoding time by an order of magnitude while attaining close to the full softmax baseline accuracy on neural machine translation and language modeling tasks. We also prove the theoretical guarantee on the softmax approximation quality.", "bibtex": "@inproceedings{NEURIPS2018_366f0bc7,\n author = {Zhang, Minjia and Wang, Wenhan and Liu, Xiaodong and Gao, Jianfeng and He, Yuxiong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Navigating with Graph Representations for Fast and Scalable Decoding of Neural Language Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/366f0bc7bd1d4bf414073cabbadfdfcd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/366f0bc7bd1d4bf414073cabbadfdfcd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/366f0bc7bd1d4bf414073cabbadfdfcd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/366f0bc7bd1d4bf414073cabbadfdfcd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/366f0bc7bd1d4bf414073cabbadfdfcd-Reviews.html", "metareview": "", "pdf_size": 503397, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1867379037291537393&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Microsoft; Microsoft; Microsoft; Microsoft; Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "email": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/366f0bc7bd1d4bf414073cabbadfdfcd-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Near Optimal Exploration-Exploitation in Non-Communicating Markov Decision Processes", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11305", "id": "11305", "author_site": "Ronan Fruit, Matteo Pirotta, Alessandro Lazaric", "author": "Ronan Fruit; Matteo Pirotta; Alessandro Lazaric", "abstract": "While designing the state space of an MDP, it is common to include states that are transient or not reachable by any policy (e.g., in mountain car, the product space of speed and position contains configurations that are not physically reachable). This results in weakly-communicating or multi-chain MDPs. In this paper, we introduce TUCRL, the first algorithm able to perform efficient exploration-exploitation in any finite Markov Decision Process (MDP) without requiring any form of prior knowledge. In particular, for any MDP with $S^c$ communicating states, $A$ actions and $\\Gamma^c \\leq S^c$ possible communicating next states, we derive a $O(D^c \\sqrt{\\Gamma^c S^c A T}) regret bound, where $D^c$ is the diameter (i.e., the length of the longest shortest path between any two states) of the communicating part of the MDP. This is in contrast with optimistic algorithms (e.g., UCRL, Optimistic PSRL) that suffer linear regret in weakly-communicating MDPs, as well as posterior sampling or regularised algorithms (e.g., REGAL), which require prior knowledge on the bias span of the optimal policy to bias the exploration to achieve sub-linear regret. We also prove that in weakly-communicating MDPs, no algorithm can ever achieve a logarithmic growth of the regret without first suffering a linear regret for a number of steps that is exponential in the parameters of the MDP. Finally, we report numerical simulations supporting our theoretical findings and showing how TUCRL overcomes the limitations of the state-of-the-art.", "bibtex": "@inproceedings{NEURIPS2018_3a20f62a,\n author = {Fruit, Ronan and Pirotta, Matteo and Lazaric, Alessandro},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Near Optimal Exploration-Exploitation in Non-Communicating Markov Decision Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3a20f62a0af1aa152670bab3c602feed-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3a20f62a0af1aa152670bab3c602feed-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3a20f62a0af1aa152670bab3c602feed-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3a20f62a0af1aa152670bab3c602feed-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3a20f62a0af1aa152670bab3c602feed-Reviews.html", "metareview": "", "pdf_size": 745898, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6645061695976054329&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "Sequel Team - Inria Lille; Sequel Team - Inria Lille; Facebook AI Research", "aff_domain": "inria.fr;inria.fr;fb.com", "email": "inria.fr;inria.fr;fb.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3a20f62a0af1aa152670bab3c602feed-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "INRIA;Meta", "aff_unique_dep": "Sequel Team;Facebook AI Research", "aff_unique_url": "https://www.inria.fr;https://research.facebook.com", "aff_unique_abbr": "Inria;FAIR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lille;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "France;United States" }, { "title": "Near-Optimal Policies for Dynamic Multinomial Logit Assortment Selection Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11315", "id": "11315", "author_site": "Yining Wang, Xi Chen, Yuan Zhou", "author": "Yining Wang; Xi Chen; Yuan Zhou", "abstract": "In this paper we consider the dynamic assortment selection problem under an uncapacitated multinomial-logit (MNL) model. By carefully analyzing a revenue potential function, we show that a trisection based algorithm achieves an item-independent regret bound of O(sqrt(T log log T), which matches information theoretical lower bounds up to iterated logarithmic terms. Our proof technique draws tools from the unimodal/convex bandit literature as well as adaptive confidence parameters in minimax multi-armed bandit problems.", "bibtex": "@inproceedings{NEURIPS2018_d88518ac,\n author = {Wang, Yining and Chen, Xi and Zhou, Yuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Near-Optimal Policies for Dynamic Multinomial Logit Assortment Selection Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d88518acbcc3d08d1f18da62f9bb26ec-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d88518acbcc3d08d1f18da62f9bb26ec-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d88518acbcc3d08d1f18da62f9bb26ec-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d88518acbcc3d08d1f18da62f9bb26ec-Reviews.html", "metareview": "", "pdf_size": 392736, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9858068107500119708&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Machine Learning Department, Carnegie Mellon University; Stern School of Business, New York University; Computer Science Department, Indiana University at Bloomington + Department of Industrial and Enterprise Systems Engineering, University of Illinois at Urbana-Champaign", "aff_domain": "cs.cmu.edu;stern.nyu.edu;illinois.edu", "email": "cs.cmu.edu;stern.nyu.edu;illinois.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d88518acbcc3d08d1f18da62f9bb26ec-Abstract.html", "aff_unique_index": "0;1;2+3", "aff_unique_norm": "Carnegie Mellon University;New York University;Indiana University;University of Illinois Urbana-Champaign", "aff_unique_dep": "Machine Learning Department;Stern School of Business;Computer Science Department;Department of Industrial and Enterprise Systems Engineering", "aff_unique_url": "https://www.cmu.edu;https://www.nyu.edu;https://www.indiana.edu;https://illinois.edu", "aff_unique_abbr": "CMU;NYU;IU;UIUC", "aff_campus_unique_index": "1;2+3", "aff_campus_unique": ";New York;Bloomington;Urbana-Champaign", "aff_country_unique_index": "0;0;0+0", "aff_country_unique": "United States" }, { "title": "Near-Optimal Time and Sample Complexities for Solving Markov Decision Processes with a Generative Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11507", "id": "11507", "author_site": "Aaron Sidford, Mengdi Wang, Xian Wu, Lin Yang, Yinyu Ye", "author": "Aaron Sidford; Mengdi Wang; Xian Wu; Lin Yang; Yinyu Ye", "abstract": "In this paper we consider the problem of computing an $\\epsilon$-optimal policy of a discounted Markov Decision Process (DMDP) provided we can only access its transition function through a generative sampling model that given any state-action pair samples from the transition function in $O(1)$ time. Given such a DMDP with states $\\states$, actions $\\actions$, discount factor $\\gamma\\in(0,1)$, and rewards in range $[0, 1]$ we provide an algorithm which computes an $\\epsilon$-optimal policy with probability $1 - \\delta$ where {\\it both} the run time spent and number of sample taken is upper bounded by \n\\[\nO\\left[\\frac{|\\cS||\\cA|}{(1-\\gamma)^3 \\epsilon^2} \\log \\left(\\frac{|\\cS||\\cA|}{(1-\\gamma)\\delta \\epsilon}\n\t\t\\right) \n\t\t\\log\\left(\\frac{1}{(1-\\gamma)\\epsilon}\\right)\\right] ~.\n\\]\nFor fixed values of $\\epsilon$, this improves upon the previous best known bounds by a factor of $(1 - \\gamma)^{-1}$ and matches the sample complexity lower bounds proved in \\cite{azar2013minimax} up to logarithmic factors. \nWe also extend our method to computing $\\epsilon$-optimal policies for finite-horizon MDP with a generative model and provide a nearly matching sample complexity lower bound.", "bibtex": "@inproceedings{NEURIPS2018_bb03e43f,\n author = {Sidford, Aaron and Wang, Mengdi and Wu, Xian and Yang, Lin and Ye, Yinyu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Near-Optimal Time and Sample Complexities for Solving Markov Decision Processes with a Generative Model},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/bb03e43ffe34eeb242a2ee4a4f125e56-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/bb03e43ffe34eeb242a2ee4a4f125e56-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/bb03e43ffe34eeb242a2ee4a4f125e56-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/bb03e43ffe34eeb242a2ee4a4f125e56-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/bb03e43ffe34eeb242a2ee4a4f125e56-Reviews.html", "metareview": "", "pdf_size": 387644, "gs_citation": 269, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9836959719663082985&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Stanford University; Princeton University; Stanford University; Princeton University; Stanford University", "aff_domain": "stanford.edu;princeton.edu;stanford.edu;princeton.edu;stanford.edu", "email": "stanford.edu;princeton.edu;stanford.edu;princeton.edu;stanford.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/bb03e43ffe34eeb242a2ee4a4f125e56-Abstract.html", "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Stanford University;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.princeton.edu", "aff_unique_abbr": "Stanford;Princeton", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Nearly tight sample complexity bounds for learning mixtures of Gaussians via sample compression schemes", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11343", "id": "11343", "author_site": "Hassan Ashtiani, Shai Ben-David, Nicholas Harvey, Christopher Liaw, Abbas Mehrabian, Yaniv Plan", "author": "Hassan Ashtiani; Shai Ben-David; Nicholas Harvey; Christopher Liaw; Abbas Mehrabian; Yaniv Plan", "abstract": "We prove that \u03f4(k d^2 / \u03b5^2) samples are necessary and sufficient for learning a mixture of k Gaussians in R^d, up to error \u03b5 in total variation distance. This improves both the known upper bounds and lower bounds for this problem. For mixtures of axis-aligned Gaussians, we show that O(k d / \u03b5^2) samples suffice, matching a known lower bound.", "bibtex": "@inproceedings{NEURIPS2018_70ece1e1,\n author = {Ashtiani, Hassan and Ben-David, Shai and Harvey, Nicholas and Liaw, Christopher and Mehrabian, Abbas and Plan, Yaniv},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Nearly tight sample complexity bounds for learning mixtures of Gaussians via sample compression schemes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/70ece1e1e0931919438fcfc6bd5f199c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/70ece1e1e0931919438fcfc6bd5f199c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/70ece1e1e0931919438fcfc6bd5f199c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/70ece1e1e0931919438fcfc6bd5f199c-Reviews.html", "metareview": "", "pdf_size": 303030, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3574852428355491801&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Computing and Software, McMaster University + Vector Institute, ON, Canada; School of Computer Science, University of Waterloo, Waterloo, ON, Canada; Department of Computer Science, University of British Columbia, Vancouver, BC, Canada; Department of Computer Science, University of British Columbia, Vancouver, BC, Canada; School of Computer Science, McGill University, Montr\u00e9al, QC, Canada; Department of Mathematics, University of British Columbia, Vancouver, BC, Canada", "aff_domain": "mcmaster.ca;uwaterloo.ca;cs.ubc.ca;cs.ubc.ca;gmail.com;math.ubc.ca", "email": "mcmaster.ca;uwaterloo.ca;cs.ubc.ca;cs.ubc.ca;gmail.com;math.ubc.ca", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/70ece1e1e0931919438fcfc6bd5f199c-Abstract.html", "aff_unique_index": "0+1;2;3;3;4;3", "aff_unique_norm": "McMaster University;Vector Institute;University of Waterloo;University of British Columbia;McGill University", "aff_unique_dep": "Department of Computing and Software;;School of Computer Science;Department of Computer Science;School of Computer Science", "aff_unique_url": "https://www.mcmaster.ca;https://vectorinstitute.ai/;https://uwaterloo.ca;https://www.ubc.ca;https://www.mcgill.ca", "aff_unique_abbr": "McMaster;Vector Institute;UW;UBC;McGill", "aff_campus_unique_index": ";1;2;2;3;2", "aff_campus_unique": ";Waterloo;Vancouver;Montr\u00e9al", "aff_country_unique_index": "0+0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Negotiable Reinforcement Learning for Pareto Optimal Sequential Decision-Making", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11463", "id": "11463", "author_site": "Nishant Desai, Andrew Critch, Stuart J Russell", "author": "Nishant Desai; Andrew Critch; Stuart Russell", "abstract": "It is commonly believed that an agent making decisions on behalf of two or more principals who have different utility functions should adopt a Pareto optimal policy, i.e. a policy that cannot be improved upon for one principal without making sacrifices for another. Harsanyi's theorem shows that when the principals have a common prior on the outcome distributions of all policies, a Pareto optimal policy for the agent is one that maximizes a fixed, weighted linear combination of the principals\u2019 utilities. In this paper, we derive a more precise generalization for the sequential decision setting in the case of principals with different priors on the dynamics of the environment. We refer to this generalization as the Negotiable Reinforcement Learning (NRL) framework. In this more general case, the relative weight given to each principal\u2019s utility should evolve over time according to how well the agent\u2019s observations conform with that principal\u2019s prior. To gain insight into the dynamics of this new framework, we implement a simple NRL agent and empirically examine its behavior in a simple environment.", "bibtex": "@inproceedings{NEURIPS2018_5b8e4fd3,\n author = {Desai, Nishant and Critch, Andrew and Russell, Stuart J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Negotiable Reinforcement Learning for Pareto Optimal Sequential Decision-Making},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5b8e4fd39d9786228649a8a8bec4e008-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5b8e4fd39d9786228649a8a8bec4e008-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5b8e4fd39d9786228649a8a8bec4e008-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5b8e4fd39d9786228649a8a8bec4e008-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5b8e4fd39d9786228649a8a8bec4e008-Reviews.html", "metareview": "", "pdf_size": 330517, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5068614029935164931&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Center for Human-Compatible AI, University of California, Berkeley; Department of EECS, University of California, Berkeley; Computer Science Division, University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;cs.berkeley.edu", "email": "berkeley.edu;berkeley.edu;cs.berkeley.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5b8e4fd39d9786228649a8a8bec4e008-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Center for Human-Compatible AI", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Neighbourhood Consensus Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11179", "id": "11179", "author_site": "Ignacio Rocco, Mircea Cimpoi, Relja Arandjelovi\u0107, Akihiko Torii, Tomas Pajdla, Josef Sivic", "author": "Ignacio Rocco; Mircea Cimpoi; Relja Arandjelovi\u0107; Akihiko Torii; Tomas Pajdla; Josef Sivic", "abstract": "We address the problem of finding reliable dense correspondences between a pair of images. This is a challenging task due to strong appearance differences between the corresponding scene elements and ambiguities generated by repetitive patterns. The contributions of this work are threefold. First, inspired by the classic idea of disambiguating feature matches using semi-local constraints, we develop an end-to-end trainable convolutional neural network architecture that identifies sets of spatially consistent matches by analyzing neighbourhood consensus patterns in the 4D space of all possible correspondences between a pair of images without the need for a global geometric model. Second, we demonstrate that the model can be trained effectively from weak supervision in the form of matching and non-matching image pairs without the need for costly manual annotation of point to point correspondences.\nThird, we show the proposed neighbourhood consensus network can be applied to a range of matching tasks including both category- and instance-level matching, obtaining the state-of-the-art results on the PF Pascal dataset and the InLoc indoor visual localization benchmark.", "bibtex": "@inproceedings{NEURIPS2018_8f7d807e,\n author = {Rocco, Ignacio and Cimpoi, Mircea and Arandjelovi\\'{c}, Relja and Torii, Akihiko and Pajdla, Tomas and Sivic, Josef},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neighbourhood Consensus Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8f7d807e1f53eff5f9efbe5cb81090fb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8f7d807e1f53eff5f9efbe5cb81090fb-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8f7d807e1f53eff5f9efbe5cb81090fb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8f7d807e1f53eff5f9efbe5cb81090fb-Reviews.html", "metareview": "", "pdf_size": 1321511, "gs_citation": 519, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15950751904193964998&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Inria+WILLOW project, D\u00e9partement d\u2019informatique de l\u2019\u00c9cole normale sup\u00e9rieure, ENS/INRIA/CNRS UMR 8548, PSL Research University, Paris, France; CIIRC, CTU in Prague+CIIRC \u2013 Czech Institute of Informatics, Robotics and Cybernetics at the Czech Technical University in Prague, Czechia; DeepMind; Tokyo Institute of Technology; CIIRC, CTU in Prague+CIIRC \u2013 Czech Institute of Informatics, Robotics and Cybernetics at the Czech Technical University in Prague, Czechia; Inria+CIIRC, CTU in Prague+CIIRC \u2013 Czech Institute of Informatics, Robotics and Cybernetics at the Czech Technical University in Prague, Czechia", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8f7d807e1f53eff5f9efbe5cb81090fb-Abstract.html", "aff_unique_index": "0+1;2+2;3;4;2+2;0+2+2", "aff_unique_norm": "INRIA;\u00c9cole Normale Sup\u00e9rieure;Czech Technical University in Prague;DeepMind;Tokyo Institute of Technology", "aff_unique_dep": ";D\u00e9partement d\u2019informatique;CIIRC;;", "aff_unique_url": "https://www.inria.fr;https://www.ens.fr;https://www.ciirc.cvut.cz/;https://deepmind.com;https://www.titech.ac.jp", "aff_unique_abbr": "Inria;ENS;CIIRC;DeepMind;Titech", "aff_campus_unique_index": "1;2+2;2+2;2+2", "aff_campus_unique": ";Paris;Prague", "aff_country_unique_index": "0+0;1+2;3;4;1+2;0+1+2", "aff_country_unique": "France;Czech Republic;Czechia;United Kingdom;Japan" }, { "title": "Neural Architecture Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11750", "id": "11750", "author_site": "Renqian Luo, Fei Tian, Tao Qin, Enhong Chen, Tie-Yan Liu", "author": "Renqian Luo; Fei Tian; Tao Qin; Enhong Chen; Tie-Yan Liu", "abstract": "Automatic neural architecture design has shown its potential in discovering powerful neural network architectures. Existing methods, no matter based on reinforcement learning or evolutionary algorithms (EA), conduct architecture search in a discrete space, which is highly inefficient. In this paper, we propose a simple and efficient method to automatic neural architecture design based on continuous optimization. We call this new approach neural architecture optimization (NAO). There are three key components in our proposed approach: (1) An encoder embeds/maps neural network architectures into a continuous space. (2) A predictor takes the continuous representation of a network as input and predicts its accuracy. (3) A decoder maps a continuous representation of a network back to its architecture. The performance predictor and the encoder enable us to perform gradient based optimization in the continuous space to find the embedding of a new architecture with potentially better accuracy. Such a better embedding is then decoded to a network by the decoder. Experiments show that the architecture discovered by our method is very competitive for image classification task on CIFAR-10 and language modeling task on PTB, outperforming or on par with the best results of previous architecture search methods with a significantly reduction of computational resources. Specifically we obtain $2.11\\%$ test set error rate for CIFAR-10 image classification task and $56.0$ test set perplexity of PTB language modeling task. The best discovered architectures on both tasks are successfully transferred to other tasks such as CIFAR-100 and WikiText-2. Furthermore, combined with the recent proposed weight sharing mechanism, we discover powerful architecture on CIFAR-10 (with error rate $3.53\\%$) and on PTB (with test set perplexity $56.6$), with very limited computational resources (less than $10$ GPU hours) for both tasks.", "bibtex": "@inproceedings{NEURIPS2018_933670f1,\n author = {Luo, Renqian and Tian, Fei and Qin, Tao and Chen, Enhong and Liu, Tie-Yan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Architecture Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/933670f1ac8ba969f32989c312faba75-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/933670f1ac8ba969f32989c312faba75-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/933670f1ac8ba969f32989c312faba75-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/933670f1ac8ba969f32989c312faba75-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/933670f1ac8ba969f32989c312faba75-Reviews.html", "metareview": "", "pdf_size": 1009091, "gs_citation": 844, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14328532131321013615&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "University of Science and Technology of China; Microsoft Research + University of Science and Technology of China; Microsoft Research; University of Science and Technology of China; Microsoft Research", "aff_domain": "mail.ustc.edu.cn;microsoft.com;microsoft.com;ustc.edu.cn;microsoft.com", "email": "mail.ustc.edu.cn;microsoft.com;microsoft.com;ustc.edu.cn;microsoft.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/933670f1ac8ba969f32989c312faba75-Abstract.html", "aff_unique_index": "0;1+0;1;0;1", "aff_unique_norm": "University of Science and Technology of China;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "http://www.ustc.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "USTC;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1+0;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Neural Architecture Search with Bayesian Optimisation and Optimal Transport", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11214", "id": "11214", "author_site": "Kirthevasan Kandasamy, Willie Neiswanger, Jeff Schneider, Barnabas Poczos, Eric Xing", "author": "Kirthevasan Kandasamy; Willie Neiswanger; Jeff Schneider; Barnabas Poczos; Eric P Xing", "abstract": "Bayesian Optimisation (BO) refers to a class of methods for global optimisation\nof a function f which is only accessible via point evaluations. It is\ntypically used in settings where f is expensive to evaluate. A common use case\nfor BO in machine learning is model selection, where it is not possible to\nanalytically model the generalisation performance of a statistical model, and\nwe resort to noisy and expensive training and validation procedures to choose\nthe best model. Conventional BO methods have focused on Euclidean and\ncategorical domains, which, in the context of model selection, only permits\ntuning scalar hyper-parameters of machine learning algorithms. However, with\nthe surge of interest in deep learning, there is an increasing demand to tune\nneural network architectures. In this work, we develop NASBOT, a Gaussian\nprocess based BO framework for neural architecture search. To accomplish this,\nwe develop a distance metric in the space of neural network architectures which\ncan be computed efficiently via an optimal transport program. This distance\nmight be of independent interest to the deep learning community as it may find\napplications outside of BO. We demonstrate that NASBOT outperforms other\nalternatives for architecture search in several cross validation based model\nselection tasks on multi-layer perceptrons and convolutional neural networks.", "bibtex": "@inproceedings{NEURIPS2018_f33ba15e,\n author = {Kandasamy, Kirthevasan and Neiswanger, Willie and Schneider, Jeff and Poczos, Barnabas and Xing, Eric P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Architecture Search with Bayesian Optimisation and Optimal Transport},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f33ba15effa5c10e873bf3842afb46a6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f33ba15effa5c10e873bf3842afb46a6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f33ba15effa5c10e873bf3842afb46a6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f33ba15effa5c10e873bf3842afb46a6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f33ba15effa5c10e873bf3842afb46a6-Reviews.html", "metareview": "", "pdf_size": 671077, "gs_citation": 779, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7308576573219301832&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f33ba15effa5c10e873bf3842afb46a6-Abstract.html" }, { "title": "Neural Arithmetic Logic Units", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11770", "id": "11770", "author_site": "Andrew Trask, Felix Hill, Scott Reed, Jack Rae, Chris Dyer, Phil Blunsom", "author": "Andrew Trask; Felix Hill; Scott E Reed; Jack Rae; Chris Dyer; Phil Blunsom", "abstract": "Neural networks can learn to represent and manipulate numerical information, but they seldom generalize well outside of the range of numerical values encountered during training. To encourage more systematic numerical extrapolation, we propose an architecture that represents numerical quantities as linear activations which are manipulated using primitive arithmetic operators, controlled by learned gates. We call this module a neural arithmetic logic unit (NALU), by analogy to the arithmetic logic unit in traditional processors. Experiments show that NALU-enhanced neural networks can learn to track time, perform arithmetic over images of numbers, translate numerical language into real-valued scalars, execute computer code, and count objects in images. In contrast to conventional architectures, we obtain substantially better generalization both inside and outside of the range of numerical values encountered during training, often extrapolating orders of magnitude beyond trained numerical ranges.", "bibtex": "@inproceedings{NEURIPS2018_0e64a7b0,\n author = {Trask, Andrew and Hill, Felix and Reed, Scott E and Rae, Jack and Dyer, Chris and Blunsom, Phil},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Arithmetic Logic Units},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0e64a7b00c83e3d22ce6b3acf2c582b6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0e64a7b00c83e3d22ce6b3acf2c582b6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0e64a7b00c83e3d22ce6b3acf2c582b6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0e64a7b00c83e3d22ce6b3acf2c582b6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0e64a7b00c83e3d22ce6b3acf2c582b6-Reviews.html", "metareview": "", "pdf_size": 508446, "gs_citation": 266, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4093117503425539195&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "DeepMind\u2021University of Oxford; DeepMind\u2021University of Oxford; DeepMind\u266dUniversity College London; DeepMind\u266dUniversity College London; DeepMind\u2021University of Oxford; DeepMind\u2021University of Oxford", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0e64a7b00c83e3d22ce6b3acf2c582b6-Abstract.html", "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "DeepMind;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://deepmind.com;https://www.ucl.ac.uk", "aff_unique_abbr": "DeepMind;UCL", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Neural Code Comprehension: A Learnable Representation of Code Semantics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11359", "id": "11359", "author_site": "Tal Ben-Nun, Alice Shoshana Jakobovits, Torsten Hoefler", "author": "Tal Ben-Nun; Alice Shoshana Jakobovits; Torsten Hoefler", "abstract": "With the recent success of embeddings in natural language processing, research has been conducted into applying similar methods to code analysis. Most works attempt to process the code directly or use a syntactic tree representation, treating it like sentences written in a natural language. However, none of the existing methods are sufficient to comprehend program semantics robustly, due to structural features such as function calls, branching, and interchangeable order of statements. In this paper, we propose a novel processing technique to learn code semantics, and apply it to a variety of program analysis tasks. In particular, we stipulate that a robust distributional hypothesis of code applies to both human- and machine-generated programs. Following this hypothesis, we define an embedding space, inst2vec, based on an Intermediate Representation (IR) of the code that is independent of the source programming language. We provide a novel definition of contextual flow for this IR, leveraging both the underlying data- and control-flow of the program. We then analyze the embeddings qualitatively using analogies and clustering, and evaluate the learned representation on three different high-level tasks. We show that even without fine-tuning, a single RNN architecture and fixed inst2vec embeddings outperform specialized approaches for performance prediction (compute device mapping, optimal thread coarsening); and algorithm classification from raw code (104 classes), where we set a new state-of-the-art.", "bibtex": "@inproceedings{NEURIPS2018_17c3433f,\n author = {Ben-Nun, Tal and Jakobovits, Alice Shoshana and Hoefler, Torsten},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Code Comprehension: A Learnable Representation of Code Semantics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/17c3433fecc21b57000debdf7ad5c930-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/17c3433fecc21b57000debdf7ad5c930-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/17c3433fecc21b57000debdf7ad5c930-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/17c3433fecc21b57000debdf7ad5c930-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/17c3433fecc21b57000debdf7ad5c930-Reviews.html", "metareview": "", "pdf_size": 1834865, "gs_citation": 327, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9627019893956716634&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 23, "aff": "ETH Zurich; ETH Zurich; ETH Zurich", "aff_domain": "inf.ethz.ch;student.ethz.ch;inf.ethz.ch", "email": "inf.ethz.ch;student.ethz.ch;inf.ethz.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/17c3433fecc21b57000debdf7ad5c930-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Neural Edit Operations for Biological Sequences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11486", "id": "11486", "author_site": "Satoshi Koide, Keisuke Kawano, Takuro Kutsuna", "author": "Satoshi Koide; Keisuke Kawano; Takuro Kutsuna", "abstract": "The evolution of biological sequences, such as proteins or DNAs, is driven by the three basic edit operations: substitution, insertion, and deletion. Motivated by the recent progress of neural network models for biological tasks, we implement two neural network architectures that can treat such edit operations. The first proposal is the edit invariant neural networks, based on differentiable Needleman-Wunsch algorithms. The second is the use of deep CNNs with concatenations. Our analysis shows that CNNs can recognize star-free regular expressions, and that deeper CNNs can recognize more complex regular expressions including the insertion/deletion of characters. The experimental results for the protein secondary structure prediction task suggest the importance of insertion/deletion. The test accuracy on the widely-used CB513 dataset is 71.5%, which is 1.2-points better than the current best result on non-ensemble models.", "bibtex": "@inproceedings{NEURIPS2018_d0921d44,\n author = {Koide, Satoshi and Kawano, Keisuke and Kutsuna, Takuro},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Edit Operations for Biological Sequences},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d0921d442ee91b896ad95059d13df618-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d0921d442ee91b896ad95059d13df618-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d0921d442ee91b896ad95059d13df618-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d0921d442ee91b896ad95059d13df618-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d0921d442ee91b896ad95059d13df618-Reviews.html", "metareview": "", "pdf_size": 582453, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4033184508762565894&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff": "Toyota Central R&D Labs.; Toyota Central R&D Labs.; Toyota Central R&D Labs.", "aff_domain": "mosk.tytlabs.co.jp;mosk.tytlabs.co.jp;mosk.tytlabs.co.jp", "email": "mosk.tytlabs.co.jp;mosk.tytlabs.co.jp;mosk.tytlabs.co.jp", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d0921d442ee91b896ad95059d13df618-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Toyota Central R&D Labs", "aff_unique_dep": "", "aff_unique_url": "https://www.toyota-global.com", "aff_unique_abbr": "Toyota R&D", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "title": "Neural Guided Constraint Logic Programming for Program Synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11187", "id": "11187", "author_site": "Lisa Zhang, Gregory Rosenblatt, Ethan Fetaya, Renjie Liao, William Byrd, Matthew Might, Raquel Urtasun, Richard Zemel", "author": "Lisa Zhang; Gregory Rosenblatt; Ethan Fetaya; Renjie Liao; William Byrd; Matthew Might; Raquel Urtasun; Richard Zemel", "abstract": "Synthesizing programs using example input/outputs is a classic problem in artificial intelligence. We present a method for solving Programming By Example (PBE) problems by using a neural model to guide the search of a constraint logic programming system called miniKanren. Crucially, the neural model uses miniKanren's internal representation as input; miniKanren represents a PBE problem as recursive constraints imposed by the provided examples. We explore Recurrent Neural Network and Graph Neural Network models. We contribute a modified miniKanren, drivable by an external agent, available at https://github.com/xuexue/neuralkanren. We show that our neural-guided approach using constraints can synthesize programs faster in many cases, and importantly, can generalize to larger problems.", "bibtex": "@inproceedings{NEURIPS2018_67d16d00,\n author = {Zhang, Lisa and Rosenblatt, Gregory and Fetaya, Ethan and Liao, Renjie and Byrd, William and Might, Matthew and Urtasun, Raquel and Zemel, Richard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Guided Constraint Logic Programming for Program Synthesis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/67d16d00201083a2b118dd5128dd6f59-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/67d16d00201083a2b118dd5128dd6f59-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/67d16d00201083a2b118dd5128dd6f59-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/67d16d00201083a2b118dd5128dd6f59-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/67d16d00201083a2b118dd5128dd6f59-Reviews.html", "metareview": "", "pdf_size": 398697, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5770275785272500195&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "University of Toronto + Vector Institute + Uber ATG; University of Alabama at Birmingham; University of Toronto + Vector Institute + Uber ATG; University of Toronto + Vector Institute + Uber ATG; University of Alabama at Birmingham; University of Alabama at Birmingham; University of Toronto + Vector Institute + Uber ATG; University of Toronto + Vector Institute", "aff_domain": "cs.toronto.edu;uab.edu;cs.toronto.edu;cs.toronto.edu;uab.edu;uab.edu;cs.toronto.edu;cs.toronto.edu", "email": "cs.toronto.edu;uab.edu;cs.toronto.edu;cs.toronto.edu;uab.edu;uab.edu;cs.toronto.edu;cs.toronto.edu", "github": "https://github.com/xuexue/neuralkanren", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/67d16d00201083a2b118dd5128dd6f59-Abstract.html", "aff_unique_index": "0+1+2;3;0+1+2;0+1+2;3;3;0+1+2;0+1", "aff_unique_norm": "University of Toronto;Vector Institute;Uber;University of Alabama at Birmingham", "aff_unique_dep": ";;Advanced Technologies Group;", "aff_unique_url": "https://www.utoronto.ca;https://vectorinstitute.ai/;https://www.uber.com;https://www.uab.edu", "aff_unique_abbr": "U of T;Vector Institute;Uber ATG;UAB", "aff_campus_unique_index": ";1;;;1;1;;", "aff_campus_unique": ";Birmingham", "aff_country_unique_index": "0+0+1;1;0+0+1;0+0+1;1;1;0+0+1;0+0", "aff_country_unique": "Canada;United States" }, { "title": "Neural Interaction Transparency (NIT): Disentangling Learned Interactions for Improved Interpretability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11565", "id": "11565", "author_site": "Michael Tsang, Hanpeng Liu, Sanjay Purushotham, Pavankumar Murali, Yan Liu", "author": "Michael Tsang; Hanpeng Liu; Sanjay Purushotham; Pavankumar Murali; Yan Liu", "abstract": "Neural networks are known to model statistical interactions, but they entangle the interactions at intermediate hidden layers for shared representation learning. We propose a framework, Neural Interaction Transparency (NIT), that disentangles the shared learning across different interactions to obtain their intrinsic lower-order and interpretable structure. This is done through a novel regularizer that directly penalizes interaction order. We show that disentangling interactions reduces a feedforward neural network to a generalized additive model with interactions, which can lead to transparent models that perform comparably to the state-of-the-art models. NIT is also flexible and efficient; it can learn generalized additive models with maximum $K$-order interactions by training only $O(1)$ models.", "bibtex": "@inproceedings{NEURIPS2018_74378afe,\n author = {Tsang, Michael and Liu, Hanpeng and Purushotham, Sanjay and Murali, Pavankumar and Liu, Yan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Interaction Transparency (NIT): Disentangling Learned Interactions for Improved Interpretability},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/74378afe5e8b20910cf1f939e57f0480-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/74378afe5e8b20910cf1f939e57f0480-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/74378afe5e8b20910cf1f939e57f0480-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/74378afe5e8b20910cf1f939e57f0480-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/74378afe5e8b20910cf1f939e57f0480-Reviews.html", "metareview": "", "pdf_size": 1111546, "gs_citation": 85, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8176820061305993707&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "University of Southern California; University of Southern California; University of Southern California; IBM T.J. Watson Research Center; University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu;us.ibm.com;usc.edu", "email": "usc.edu;usc.edu;usc.edu;us.ibm.com;usc.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/74378afe5e8b20910cf1f939e57f0480-Abstract.html", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Southern California;IBM", "aff_unique_dep": ";Research Center", "aff_unique_url": "https://www.usc.edu;https://www.ibm.com/research/watson", "aff_unique_abbr": "USC;IBM", "aff_campus_unique_index": "0;0;0;1;0", "aff_campus_unique": "Los Angeles;T.J. Watson", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Neural Nearest Neighbors Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11128", "id": "11128", "author_site": "Tobias Pl\u00f6tz, Stefan Roth", "author": "Tobias Pl\u00f6tz; Stefan Roth", "abstract": "Non-local methods exploiting the self-similarity of natural signals have been well studied, for example in image analysis and restoration. Existing approaches, however, rely on k-nearest neighbors (KNN) matching in a fixed feature space. The main hurdle in optimizing this feature space w.r.t. application performance is the non-differentiability of the KNN selection rule. To overcome this, we propose a continuous deterministic relaxation of KNN selection that maintains differentiability w.r.t. pairwise distances, but retains the original KNN as the limit of a temperature parameter approaching zero. To exploit our relaxation, we propose the neural nearest neighbors block (N3 block), a novel non-local processing layer that leverages the principle of self-similarity and can be used as building block in modern neural network architectures. We show its effectiveness for the set reasoning task of correspondence classification as well as for image restoration, including image denoising and single image super-resolution, where we outperform strong convolutional neural network (CNN) baselines and recent non-local models that rely on KNN selection in hand-chosen features spaces.", "bibtex": "@inproceedings{NEURIPS2018_f0e52b27,\n author = {Pl\\\"{o}tz, Tobias and Roth, Stefan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Nearest Neighbors Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f0e52b27a7a5d6a1a87373dffa53dbe5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f0e52b27a7a5d6a1a87373dffa53dbe5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f0e52b27a7a5d6a1a87373dffa53dbe5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f0e52b27a7a5d6a1a87373dffa53dbe5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f0e52b27a7a5d6a1a87373dffa53dbe5-Reviews.html", "metareview": "", "pdf_size": 4295331, "gs_citation": 396, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11963067599142958734&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Computer Science, TU Darmstadt; Department of Computer Science, TU Darmstadt", "aff_domain": ";", "email": ";", "github": "https://github.com/visinf/n3net/", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f0e52b27a7a5d6a1a87373dffa53dbe5-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TU Darmstadt", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Darmstadt", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Neural Networks Trained to Solve Differential Equations Learn General Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11404", "id": "11404", "author_site": "Martin Magill, Faisal Qureshi, Hendrick de Haan", "author": "Martin Magill; Faisal Qureshi; Hendrick de Haan", "abstract": "We introduce a technique based on the singular vector canonical correlation analysis (SVCCA) for measuring the generality of neural network layers across a continuously-parametrized set of tasks. We illustrate this method by studying generality in neural networks trained to solve parametrized boundary value problems based on the Poisson partial differential equation. We find that the first hidden layers are general, and that they learn generalized coordinates over the input domain. Deeper layers are successively more specific. Next, we validate our method against an existing technique that measures layer generality using transfer learning experiments. We find excellent agreement between the two methods, and note that our method is much faster, particularly for continuously-parametrized problems. Finally, we also apply our method to networks trained on MNIST, and show it is consistent with, and complimentary to, another study of intrinsic dimensionality.", "bibtex": "@inproceedings{NEURIPS2018_d7a84628,\n author = {Magill, Martin and Qureshi, Faisal and de Haan, Hendrick},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Networks Trained to Solve Differential Equations Learn General Representations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d7a84628c025d30f7b2c52c958767e76-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d7a84628c025d30f7b2c52c958767e76-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d7a84628c025d30f7b2c52c958767e76-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d7a84628c025d30f7b2c52c958767e76-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d7a84628c025d30f7b2c52c958767e76-Reviews.html", "metareview": "", "pdf_size": 501630, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1998416737324732814&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "U. of Ontario Inst. of Tech.; U. of Ontario Inst. of Tech.; U. of Ontario Inst. of Tech.", "aff_domain": "uoit.net;uoit.ca;uoit.ca", "email": "uoit.net;uoit.ca;uoit.ca", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d7a84628c025d30f7b2c52c958767e76-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Ontario Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.uoit.ca", "aff_unique_abbr": "UOIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Neural Ordinary Differential Equations", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11635", "id": "11635", "author_site": "Tian Qi Chen, Yulia Rubanova, Jesse Bettencourt, David Duvenaud", "author": "Ricky T. Q. Chen; Yulia Rubanova; Jesse Bettencourt; David K. Duvenaud", "abstract": "We introduce a new family of deep neural network models. Instead of specifying a discrete sequence of hidden layers, we parameterize the derivative of the hidden state using a neural network. The output of the network is computed using a blackbox differential equation solver. These continuous-depth models have constant memory cost, adapt their evaluation strategy to each input, and can explicitly trade numerical precision for speed. We demonstrate these properties in continuous-depth residual networks and continuous-time latent variable models. We also construct continuous normalizing flows, a generative model that can train by maximum likelihood, without partitioning or ordering the data dimensions. For training, we show how to scalably backpropagate through any ODE solver, without access to its internal operations. This allows end-to-end training of ODEs within larger models.", "bibtex": "@inproceedings{NEURIPS2018_69386f6b,\n author = {Chen, Ricky T. Q. and Rubanova, Yulia and Bettencourt, Jesse and Duvenaud, David K},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Ordinary Differential Equations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/69386f6bb1dfed68692a24c8686939b9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/69386f6bb1dfed68692a24c8686939b9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/69386f6bb1dfed68692a24c8686939b9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/69386f6bb1dfed68692a24c8686939b9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/69386f6bb1dfed68692a24c8686939b9-Reviews.html", "metareview": "", "pdf_size": 3578735, "gs_citation": 6824, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13748354740225969894&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 26, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/69386f6bb1dfed68692a24c8686939b9-Abstract.html" }, { "title": "Neural Proximal Gradient Descent for Compressive Imaging", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11910", "id": "11910", "author_site": "Morteza Mardani, Qingyun Sun, David Donoho, Vardan Papyan, Hatef Monajemi, Shreyas Vasanawala, John Pauly", "author": "Morteza Mardani; Qingyun Sun; David Donoho; Vardan Papyan; Hatef Monajemi; Shreyas Vasanawala; John Pauly", "abstract": "Recovering high-resolution images from limited sensory data typically leads to a serious ill-posed inverse problem, demanding inversion algorithms that effectively capture the prior information. Learning a good inverse mapping from training data faces severe challenges, including: (i) scarcity of training data; (ii) need for plausible reconstructions that are physically feasible; (iii) need for fast reconstruction, especially in real-time applications. We develop a successful system solving all these challenges, using as basic architecture the repetitive application of alternating proximal and data fidelity constraints. We learn a proximal map that works well with real images based on residual networks with recurrent blocks. Extensive experiments are carried out under different settings: (a) reconstructing abdominal MRI of pediatric patients from highly undersampled k-space data and (b) super-resolving natural face images. Our key findings include: 1. a recurrent ResNet with a single residual block (10-fold repetition) yields an effective proximal which accurately reveals MR image details. 2. Our architecture significantly outperforms conventional non-recurrent deep ResNets by 2dB SNR; it is also trained much more rapidly. 3. It outperforms state-of-the-art compressed-sensing Wavelet-based methods by 4dB SNR, with 100x speedups in reconstruction time.", "bibtex": "@inproceedings{NEURIPS2018_61d009da,\n author = {Mardani, Morteza and Sun, Qingyun and Donoho, David and Papyan, Vardan and Monajemi, Hatef and Vasanawala, Shreyas and Pauly, John},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Proximal Gradient Descent for Compressive Imaging},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/61d009da208a34ae155420e55f97abc7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/61d009da208a34ae155420e55f97abc7-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/61d009da208a34ae155420e55f97abc7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/61d009da208a34ae155420e55f97abc7-Reviews.html", "metareview": "", "pdf_size": 1461992, "gs_citation": 175, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3786755267132685313&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Depts. of Electrical Eng.; Depts. of Mathematics; Depts. of Radiology; Depts. of Statistics; Depts. of Statistics; Depts. of Electrical Eng.; Depts. of Statistics", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/61d009da208a34ae155420e55f97abc7-Abstract.html", "aff_unique_index": "0;1;1;1;1;0;1", "aff_unique_norm": "Electrical Engineering Department;University Affiliation Not Specified", "aff_unique_dep": "Electrical Engineering;Department of Mathematics", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "Neural Tangent Kernel: Convergence and Generalization in Neural Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11819", "id": "11819", "author_site": "Arthur Jacot-Guillarmod, Clement Hongler, Franck Gabriel", "author": "Arthur Jacot; Franck Gabriel; Clement Hongler", "abstract": "At initialization, artificial neural networks (ANNs) are equivalent to Gaussian processes in the infinite-width limit, thus connecting them to kernel methods. We prove that the evolution of an ANN during training can also be described by a kernel: during gradient descent on the parameters of an ANN, the network function (which maps input vectors to output vectors) follows the so-called kernel gradient associated with a new object, which we call the Neural Tangent Kernel (NTK). This kernel is central to describe the generalization features of ANNs. While the NTK is random at initialization and varies during training, in the infinite-width limit it converges to an explicit limiting kernel and stays constant during training. This makes it possible to study the training of ANNs in function space instead of parameter space. Convergence of the training can then be related to the positive-definiteness of the limiting NTK.", "bibtex": "@inproceedings{NEURIPS2018_5a4be1fa,\n author = {Jacot, Arthur and Gabriel, Franck and Hongler, Clement},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Tangent Kernel: Convergence and Generalization in Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5a4be1fa34e62bb8a6ec6b91d2462f5a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5a4be1fa34e62bb8a6ec6b91d2462f5a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5a4be1fa34e62bb8a6ec6b91d2462f5a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5a4be1fa34e62bb8a6ec6b91d2462f5a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5a4be1fa34e62bb8a6ec6b91d2462f5a-Reviews.html", "metareview": "", "pdf_size": 421992, "gs_citation": 4205, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15521977800069603597&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne; Imperial College London + \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne; \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne", "aff_domain": "netopera.net;gmail.com;gmail.com", "email": "netopera.net;gmail.com;gmail.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5a4be1fa34e62bb8a6ec6b91d2462f5a-Abstract.html", "aff_unique_index": "0;1+0;0", "aff_unique_norm": "EPFL;Imperial College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.imperial.ac.uk", "aff_unique_abbr": "EPFL;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1+0;0", "aff_country_unique": "Switzerland;United Kingdom" }, { "title": "Neural Voice Cloning with a Few Samples", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11950", "id": "11950", "author_site": "Sercan Arik, Jitong Chen, Kainan Peng, Wei Ping, Yanqi Zhou", "author": "Sercan Arik; Jitong Chen; Kainan Peng; Wei Ping; Yanqi Zhou", "abstract": "Voice cloning is a highly desired feature for personalized speech interfaces. We introduce a neural voice cloning system that learns to synthesize a person's voice from only a few audio samples. We study two approaches: speaker adaptation and speaker encoding. Speaker adaptation is based on fine-tuning a multi-speaker generative model. Speaker encoding is based on training a separate model to directly infer a new speaker embedding, which will be applied to a multi-speaker generative model. In terms of naturalness of the speech and similarity to the original speaker, both approaches can achieve good performance, even with a few cloning audios. While speaker adaptation can achieve slightly better naturalness and similarity, cloning time and required memory for the speaker encoding approach are significantly less, making it more favorable for low-resource deployment.", "bibtex": "@inproceedings{NEURIPS2018_4559912e,\n author = {Arik, Sercan and Chen, Jitong and Peng, Kainan and Ping, Wei and Zhou, Yanqi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Voice Cloning with a Few Samples},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4559912e7a94a9c32b09d894f2bc3c82-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4559912e7a94a9c32b09d894f2bc3c82-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4559912e7a94a9c32b09d894f2bc3c82-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4559912e7a94a9c32b09d894f2bc3c82-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4559912e7a94a9c32b09d894f2bc3c82-Reviews.html", "metareview": "", "pdf_size": 417790, "gs_citation": 517, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15797784530218455622&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Baidu Research; Baidu Research; Baidu Research; Baidu Research; Baidu Research", "aff_domain": "baidu.com;baidu.com;baidu.com;baidu.com;baidu.com", "email": "baidu.com;baidu.com;baidu.com;baidu.com;baidu.com", "github": "", "project": "https://audiodemos.github.io", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4559912e7a94a9c32b09d894f2bc3c82-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Baidu", "aff_unique_dep": "Baidu Research", "aff_unique_url": "https://research.baidu.com", "aff_unique_abbr": "Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Neural-Symbolic VQA: Disentangling Reasoning from Vision and Language Understanding", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11123", "id": "11123", "author_site": "Kexin Yi, Jiajun Wu, Chuang Gan, Antonio Torralba, Pushmeet Kohli, Josh Tenenbaum", "author": "Kexin Yi; Jiajun Wu; Chuang Gan; Antonio Torralba; Pushmeet Kohli; Josh Tenenbaum", "abstract": "We marry two powerful ideas: deep representation learning for visual recognition and language understanding, and symbolic program execution for reasoning. Our neural-symbolic visual question answering (NS-VQA) system first recovers a structural scene representation from the image and a program trace from the question. It then executes the program on the scene representation to obtain an answer. Incorporating symbolic structure as prior knowledge offers three unique advantages. First, executing programs on a symbolic space is more robust to long program traces; our model can solve complex reasoning tasks better, achieving an accuracy of 99.8% on the CLEVR dataset. Second, the model is more data- and memory-efficient: it performs well after learning on a small number of training data; it can also encode an image into a compact representation, requiring less storage than existing methods for offline question answering. Third, symbolic program execution offers full transparency to the reasoning process; we are thus able to interpret and diagnose each execution step.", "bibtex": "@inproceedings{NEURIPS2018_5e388103,\n author = {Yi, Kexin and Wu, Jiajun and Gan, Chuang and Torralba, Antonio and Kohli, Pushmeet and Tenenbaum, Josh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural-Symbolic VQA: Disentangling Reasoning from Vision and Language Understanding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5e388103a391daabe3de1d76a6739ccd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5e388103a391daabe3de1d76a6739ccd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5e388103a391daabe3de1d76a6739ccd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5e388103a391daabe3de1d76a6739ccd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5e388103a391daabe3de1d76a6739ccd-Reviews.html", "metareview": "", "pdf_size": 1793749, "gs_citation": 776, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10941421330777237042&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Harvard University; MIT CSAIL; MIT-IBM Watson AI Lab; MIT CSAIL; DeepMind; MIT CSAIL", "aff_domain": "; ; ; ; ; ", "email": "; ; ; ; ; ", "github": "", "project": "http://nsvqa.csail.mit.edu", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5e388103a391daabe3de1d76a6739ccd-Abstract.html", "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "Harvard University;Massachusetts Institute of Technology;DeepMind", "aff_unique_dep": ";Computer Science and Artificial Intelligence Laboratory;", "aff_unique_url": "https://www.harvard.edu;https://www.csail.mit.edu;https://deepmind.com", "aff_unique_abbr": "Harvard;MIT CSAIL;DeepMind", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "New Insight into Hybrid Stochastic Gradient Descent: Beyond With-Replacement Sampling and Convexity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11141", "id": "11141", "author_site": "Pan Zhou, Xiaotong Yuan, Jiashi Feng", "author": "Pan Zhou; Xiaotong Yuan; Jiashi Feng", "abstract": "As an incremental-gradient algorithm, the hybrid stochastic gradient descent (HSGD) enjoys merits of both stochastic and full gradient methods for finite-sum minimization problem. However, the existing rate-of-convergence analysis for HSGD is made under with-replacement sampling (WRS) and is restricted to convex problems. It is not clear whether HSGD still carries these advantages under the common practice of without-replacement sampling (WoRS) for non-convex problems. In this paper, we affirmatively answer this open question by showing that under WoRS and for both convex and non-convex problems, it is still possible for HSGD (with constant step-size) to match full gradient descent in rate of convergence, while maintaining comparable sample-size-independent incremental first-order oracle complexity to stochastic gradient descent. For a special class of finite-sum problems with linear prediction models, our convergence results can be further improved in some cases. Extensive numerical results confirm our theoretical affirmation and demonstrate the favorable efficiency of WoRS-based HSGD.", "bibtex": "@inproceedings{NEURIPS2018_67e103b0,\n author = {Zhou, Pan and Yuan, Xiaotong and Feng, Jiashi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {New Insight into Hybrid Stochastic Gradient Descent: Beyond With-Replacement Sampling and Convexity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/67e103b0761e60683e83c559be18d40c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/67e103b0761e60683e83c559be18d40c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/67e103b0761e60683e83c559be18d40c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/67e103b0761e60683e83c559be18d40c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/67e103b0761e60683e83c559be18d40c-Reviews.html", "metareview": "", "pdf_size": 564599, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2021174098367059248&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Learning & Vision Lab, National University of Singapore, Singapore; B-DAT Lab, Nanjing University of Information Science & Technology, Nanjing, China; Learning & Vision Lab, National University of Singapore, Singapore", "aff_domain": "u.nus.edu;nuist.edu.cn;nus.edu.sg", "email": "u.nus.edu;nuist.edu.cn;nus.edu.sg", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/67e103b0761e60683e83c559be18d40c-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "National University of Singapore;Nanjing University of Information Science & Technology", "aff_unique_dep": "Learning & Vision Lab;B-DAT Lab", "aff_unique_url": "https://www.nus.edu.sg;http://www.nuist.edu.cn", "aff_unique_abbr": "NUS;", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Singapore;Nanjing", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;China" }, { "title": "Non-Adversarial Mapping with VAEs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11724", "id": "11724", "author": "Yedid Hoshen", "abstract": "The study of cross-domain mapping without supervision has recently attracted much attention. Much of the recent progress was enabled by the use of adversarial training as well as cycle constraints. The practical difficulty of adversarial training motivates research into non-adversarial methods. In a recent paper, it was shown that cross-domain mapping is possible without the use of cycles or GANs. Although promising, this approach suffers from several drawbacks including costly inference and an optimization variable for every training example preventing the method from using large training sets. We present an alternative approach which is able to achieve non-adversarial mapping using a novel form of Variational Auto-Encoder. Our method is much faster at inference time, is able to leverage large datasets and has a simple interpretation.", "bibtex": "@inproceedings{NEURIPS2018_3db11d25,\n author = {Hoshen, Yedid},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Non-Adversarial Mapping with VAEs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3db11d259a9db7fb8965bdf25ec850b9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3db11d259a9db7fb8965bdf25ec850b9-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3db11d259a9db7fb8965bdf25ec850b9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3db11d259a9db7fb8965bdf25ec850b9-Reviews.html", "metareview": "", "pdf_size": 463115, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15050139546099694208&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Facebook AI Research", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3db11d259a9db7fb8965bdf25ec850b9-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Meta", "aff_unique_dep": "Facebook AI Research", "aff_unique_url": "https://research.facebook.com", "aff_unique_abbr": "FAIR", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Non-Ergodic Alternating Proximal Augmented Lagrangian Algorithms with Optimal Rates", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11472", "id": "11472", "author": "Quoc Tran Dinh", "abstract": "We develop two new non-ergodic alternating proximal augmented Lagrangian algorithms (NEAPAL) to solve a class of nonsmooth constrained convex optimization problems. Our approach relies on a novel combination of the augmented Lagrangian framework, alternating/linearization scheme, Nesterov's acceleration techniques, and adaptive strategy for parameters. Our algorithms have several new features compared to existing methods. Firstly, they have a Nesterov's acceleration step on the primal variables compared to the dual one in several methods in the literature.\nSecondly, they achieve non-ergodic optimal convergence rates under standard assumptions, i.e. an $\\mathcal{O}\\left(\\frac{1}{k}\\right)$ rate without any smoothness or strong convexity-type assumption, or an $\\mathcal{O}\\left(\\frac{1}{k^2}\\right)$ rate under only semi-strong convexity, where $k$ is the iteration counter. \nThirdly, they preserve or have better per-iteration complexity compared to existing algorithms. Fourthly, they can be implemented in a parallel fashion.\nFinally, all the parameters are adaptively updated without heuristic tuning.\nWe verify our algorithms on different numerical examples and compare them with some state-of-the-art methods.", "bibtex": "@inproceedings{NEURIPS2018_7e3b7a5b,\n author = {Tran Dinh, Quoc},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Non-Ergodic Alternating Proximal Augmented Lagrangian Algorithms with Optimal Rates},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7e3b7a5bafcb0fa8e8dfe3ea6aca9186-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7e3b7a5bafcb0fa8e8dfe3ea6aca9186-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7e3b7a5bafcb0fa8e8dfe3ea6aca9186-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7e3b7a5bafcb0fa8e8dfe3ea6aca9186-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7e3b7a5bafcb0fa8e8dfe3ea6aca9186-Reviews.html", "metareview": "", "pdf_size": 1012481, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3272770194518292339&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Department of Statistics and Operations Research, University of North Carolina at Chapel Hill", "aff_domain": "email.unc.edu", "email": "email.unc.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7e3b7a5bafcb0fa8e8dfe3ea6aca9186-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "University of North Carolina at Chapel Hill", "aff_unique_dep": "Department of Statistics and Operations Research", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC Chapel Hill", "aff_campus_unique_index": "0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Non-Local Recurrent Network for Image Restoration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11181", "id": "11181", "author_site": "Ding Liu, Bihan Wen, Yuchen Fan, Chen Change Loy, Thomas Huang", "author": "Ding Liu; Bihan Wen; Yuchen Fan; Chen Change Loy; Thomas S. Huang", "abstract": "Many classic methods have shown non-local self-similarity in natural images to be an effective prior for image restoration. However, it remains unclear and challenging to make use of this intrinsic property via deep networks. In this paper, we propose a non-local recurrent network (NLRN) as the first attempt to incorporate non-local operations into a recurrent neural network (RNN) for image restoration. The main contributions of this work are: (1) Unlike existing methods that measure self-similarity in an isolated manner, the proposed non-local module can be flexibly integrated into existing deep networks for end-to-end training to capture deep feature correlation between each location and its neighborhood. (2) We fully employ the RNN structure for its parameter efficiency and allow deep feature correlation to be propagated along adjacent recurrent states. This new design boosts robustness against inaccurate correlation estimation due to severely degraded images. (3) We show that it is essential to maintain a confined neighborhood for computing deep feature correlation given degraded images. This is in contrast to existing practice that deploys the whole image. Extensive experiments on both image denoising and super-resolution tasks are conducted. Thanks to the recurrent non-local operations and correlation propagation, the proposed NLRN achieves superior results to state-of-the-art methods with many fewer parameters.", "bibtex": "@inproceedings{NEURIPS2018_fc49306d,\n author = {Liu, Ding and Wen, Bihan and Fan, Yuchen and Loy, Chen Change and Huang, Thomas S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Non-Local Recurrent Network for Image Restoration},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fc49306d97602c8ed1be1dfbf0835ead-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fc49306d97602c8ed1be1dfbf0835ead-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/fc49306d97602c8ed1be1dfbf0835ead-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fc49306d97602c8ed1be1dfbf0835ead-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fc49306d97602c8ed1be1dfbf0835ead-Reviews.html", "metareview": "", "pdf_size": 1325492, "gs_citation": 769, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17713021931965385894&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "University of Illinois at Urbana-Champaign; University of Illinois at Urbana-Champaign; University of Illinois at Urbana-Champaign; Nanyang Technological University; University of Illinois at Urbana-Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;ntu.edu.sg;illinois.edu", "email": "illinois.edu;illinois.edu;illinois.edu;ntu.edu.sg;illinois.edu", "github": "https://github.com/Ding-Liu/NLRN", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fc49306d97602c8ed1be1dfbf0835ead-Abstract.html", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.ntu.edu.sg", "aff_unique_abbr": "UIUC;NTU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Singapore" }, { "title": "Non-delusional Q-learning and value-iteration", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11944", "id": "11944", "author_site": "Tyler Lu, Dale Schuurmans, Craig Boutilier", "author": "Tyler Lu; Dale Schuurmans; Craig Boutilier", "abstract": "We identify a fundamental source of error in Q-learning and other forms of dynamic programming with function approximation. Delusional bias arises when the approximation architecture limits the class of expressible greedy policies. Since standard Q-updates make globally uncoordinated action choices with respect to the expressible policy class, inconsistent or even conflicting Q-value estimates can result, leading to pathological behaviour such as over/under-estimation, instability and even divergence. To solve this problem, we introduce a new notion of policy consistency and define a local backup process that ensures global consistency through the use of information sets---sets that record constraints on policies consistent with backed-up Q-values. We prove that both the model-based and model-free algorithms using this backup remove delusional bias, yielding the first known algorithms that guarantee optimal results under general conditions. These algorithms furthermore only require polynomially many information sets (from a potentially exponential support). Finally, we suggest other practical heuristics for value-iteration and Q-learning that attempt to reduce delusional bias.", "bibtex": "@inproceedings{NEURIPS2018_5fd0245f,\n author = {Lu, Tyler and Schuurmans, Dale and Boutilier, Craig},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Non-delusional Q-learning and value-iteration},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5fd0245f6c9ddbdf3eff0f505975b6a7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5fd0245f6c9ddbdf3eff0f505975b6a7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5fd0245f6c9ddbdf3eff0f505975b6a7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5fd0245f6c9ddbdf3eff0f505975b6a7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5fd0245f6c9ddbdf3eff0f505975b6a7-Reviews.html", "metareview": "", "pdf_size": 498966, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6109849635690983409&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "Google AI; Google AI; Google AI", "aff_domain": "google.com;google.com;google.com", "email": "google.com;google.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5fd0245f6c9ddbdf3eff0f505975b6a7-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google AI", "aff_unique_url": "https://ai.google", "aff_unique_abbr": "Google AI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Non-metric Similarity Graphs for Maximum Inner Product Search", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11464", "id": "11464", "author_site": "Stanislav Morozov, Artem Babenko", "author": "Stanislav Morozov; Artem Babenko", "abstract": "In this paper we address the problem of Maximum Inner Product Search (MIPS) that is currently the computational bottleneck in a large number of machine learning applications. \nWhile being similar to the nearest neighbor search (NNS), the MIPS problem was shown to be more challenging, as the inner product is not a proper metric function. We propose to solve the MIPS problem with the usage of similarity graphs, i.e., graphs where each vertex is connected to the vertices that are the most similar in terms of some similarity function. Originally, the framework of similarity graphs was proposed for metric spaces and in this paper we naturally extend it to the non-metric MIPS scenario. We demonstrate that, unlike existing approaches, similarity graphs do not require any data transformation to reduce MIPS to the NNS problem and should be used for the original data. Moreover, we explain why such a reduction is detrimental for similarity graphs. By an extensive comparison to the existing approaches, we show that the proposed method is a game-changer in terms of the runtime/accuracy trade-off for the MIPS problem.", "bibtex": "@inproceedings{NEURIPS2018_229754d7,\n author = {Morozov, Stanislav and Babenko, Artem},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Non-metric Similarity Graphs for Maximum Inner Product Search},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/229754d7799160502a143a72f6789927-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/229754d7799160502a143a72f6789927-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/229754d7799160502a143a72f6789927-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/229754d7799160502a143a72f6789927-Reviews.html", "metareview": "", "pdf_size": 542278, "gs_citation": 98, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7566476240574710197&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Yandex + Lomonosov Moscow State University; Yandex + National Research University Higher School of Economics", "aff_domain": "yandex.ru;phystech.edu", "email": "yandex.ru;phystech.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/229754d7799160502a143a72f6789927-Abstract.html", "aff_unique_index": "0+1;0+2", "aff_unique_norm": "Yandex;Lomonosov Moscow State University;National Research University Higher School of Economics", "aff_unique_dep": ";;", "aff_unique_url": "https://yandex.com;https://www.msu.ru;https://hse.ru", "aff_unique_abbr": "Yandex;MSU;HSE", "aff_campus_unique_index": "1;", "aff_campus_unique": ";Moscow", "aff_country_unique_index": "0+0;0+0", "aff_country_unique": "Russian Federation" }, { "title": "Non-monotone Submodular Maximization in Exponentially Fewer Iterations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11245", "id": "11245", "author_site": "Eric Balkanski, Adam Breuer, Yaron Singer", "author": "Eric Balkanski; Adam Breuer; Yaron Singer", "abstract": "In this paper we consider parallelization for applications whose objective can be\nexpressed as maximizing a non-monotone submodular function under a cardinality constraint. Our main result is an algorithm whose approximation is arbitrarily close\nto 1/2e in O(log^2 n) adaptive rounds, where n is the size of the ground set. This is an exponential speedup in parallel running time over any previously studied algorithm for constrained non-monotone submodular maximization. Beyond its provable guarantees, the algorithm performs well in practice. Specifically, experiments on traffic monitoring and personalized data summarization applications show that the algorithm finds solutions whose values are competitive with state-of-the-art algorithms while running in exponentially fewer parallel iterations.", "bibtex": "@inproceedings{NEURIPS2018_a42a596f,\n author = {Balkanski, Eric and Breuer, Adam and Singer, Yaron},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Non-monotone Submodular Maximization in Exponentially Fewer Iterations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a42a596fc71e17828440030074d15e74-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a42a596fc71e17828440030074d15e74-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a42a596fc71e17828440030074d15e74-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a42a596fc71e17828440030074d15e74-Reviews.html", "metareview": "", "pdf_size": 575061, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6421919153691462843&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Harvard University; Harvard University; Harvard University", "aff_domain": "g.harvard.edu;g.harvard.edu;seas.harvard.edu", "email": "g.harvard.edu;g.harvard.edu;seas.harvard.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a42a596fc71e17828440030074d15e74-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Nonlocal Neural Networks, Nonlocal Diffusion and Nonlocal Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11073", "id": "11073", "author_site": "Yunzhe Tao, Qi Sun, Qiang Du, Wei Liu", "author": "Yunzhe Tao; Qi Sun; Qiang Du; Wei Liu", "abstract": "Nonlocal neural networks have been proposed and shown to be effective in several computer vision tasks, where the nonlocal operations can directly capture long-range dependencies in the feature space. In this paper, we study the nature of diffusion and damping effect of nonlocal networks by doing spectrum analysis on the weight matrices of the well-trained networks, and then propose a new formulation of the nonlocal block. The new block not only learns the nonlocal interactions but also has stable dynamics, thus allowing deeper nonlocal structures. Moreover, we interpret our formulation from the general nonlocal modeling perspective, where we make connections between the proposed nonlocal network and other nonlocal models, such as nonlocal diffusion process and Markov jump process.", "bibtex": "@inproceedings{NEURIPS2018_d296c101,\n author = {Tao, Yunzhe and Sun, Qi and Du, Qiang and Liu, Wei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Nonlocal Neural Networks, Nonlocal Diffusion and Nonlocal Modeling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d296c101daa88a51f6ca8cfc1ac79b50-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d296c101daa88a51f6ca8cfc1ac79b50-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d296c101daa88a51f6ca8cfc1ac79b50-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d296c101daa88a51f6ca8cfc1ac79b50-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d296c101daa88a51f6ca8cfc1ac79b50-Reviews.html", "metareview": "", "pdf_size": 363926, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12178175871383980311&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "School of Engineering and Applied Science, Columbia University, USA; BCSRC & USTC, Beijing, China; School of Engineering and Applied Science, Columbia University, USA; Tencent AI Lab, Shenzhen, China", "aff_domain": "columbia.edu;csrc.ac.cn;columbia.edu;columbia.edu", "email": "columbia.edu;csrc.ac.cn;columbia.edu;columbia.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d296c101daa88a51f6ca8cfc1ac79b50-Abstract.html", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Columbia University;University of Science and Technology of China;Tencent", "aff_unique_dep": "School of Engineering and Applied Science;;AI Lab", "aff_unique_url": "https://www.columbia.edu;http://www.ustc.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "Columbia;USTC;Tencent AI Lab", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Beijing;Shenzhen", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;China" }, { "title": "Nonparametric Bayesian Lomax delegate racing for survival analysis with competing risks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11490", "id": "11490", "author_site": "Quan Zhang, Mingyuan Zhou", "author": "Quan Zhang; Mingyuan Zhou", "abstract": "We propose Lomax delegate racing (LDR) to explicitly model the mechanism of survival under competing risks and to interpret how the covariates accelerate or decelerate the time to event. LDR explains non-monotonic covariate effects by racing a potentially infinite number of sub-risks, and consequently relaxes the ubiquitous proportional-hazards assumption which may be too restrictive. Moreover, LDR is naturally able to model not only censoring, but also missing event times or event types. For inference, we develop a Gibbs sampler under data augmentation for moderately sized data, along with a stochastic gradient descent maximum a posteriori inference algorithm for big data applications. Illustrative experiments are provided on both synthetic and real datasets, and comparison with various benchmark algorithms for survival analysis with competing risks demonstrates distinguished performance of LDR.", "bibtex": "@inproceedings{NEURIPS2018_0fe47339,\n author = {Zhang, Quan and Zhou, Mingyuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Nonparametric Bayesian Lomax delegate racing for survival analysis with competing risks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0fe473396242072e84af286632d3f0ff-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0fe473396242072e84af286632d3f0ff-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0fe473396242072e84af286632d3f0ff-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0fe473396242072e84af286632d3f0ff-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0fe473396242072e84af286632d3f0ff-Reviews.html", "metareview": "", "pdf_size": 485406, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8420900743499045396&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "McCombs School of Business, The University of Texas at Austin; McCombs School of Business, The University of Texas at Austin", "aff_domain": "mccombs.utexas.edu;mccombs.utexas.edu", "email": "mccombs.utexas.edu;mccombs.utexas.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0fe473396242072e84af286632d3f0ff-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "McCombs School of Business", "aff_unique_url": "https://www.mccombs.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Nonparametric Density Estimation under Adversarial Losses", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12560", "id": "12560", "author_site": "Shashank Singh, Ananya Uppal, Boyue Li, Chun-Liang Li, Manzil Zaheer, Barnabas Poczos", "author": "Shashank Singh; Ananya Uppal; Boyue Li; Chun-Liang Li; Manzil Zaheer; Barnabas Poczos", "abstract": "We study minimax convergence rates of nonparametric density estimation under a large class of loss functions called ``adversarial losses'', which, besides classical L^p losses, includes maximum mean discrepancy (MMD), Wasserstein distance, and total variation distance. These losses are closely related to the losses encoded by discriminator networks in generative adversarial networks (GANs). In a general framework, we study how the choice of loss and the assumed smoothness of the underlying density together determine the minimax rate. We also discuss implications for training GANs based on deep ReLU networks, and more general connections to learning implicit generative models in a minimax statistical sense.", "bibtex": "@inproceedings{NEURIPS2018_4996dcc4,\n author = {Singh, Shashank and Uppal, Ananya and Li, Boyue and Li, Chun-Liang and Zaheer, Manzil and Poczos, Barnabas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Nonparametric Density Estimation under Adversarial Losses},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4996dcc43b5be197b5887a4e60817b1c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4996dcc43b5be197b5887a4e60817b1c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4996dcc43b5be197b5887a4e60817b1c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4996dcc43b5be197b5887a4e60817b1c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4996dcc43b5be197b5887a4e60817b1c-Reviews.html", "metareview": "", "pdf_size": 400499, "gs_citation": 94, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=139101477783263708&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Machine Learning Department+Department of Statistics & Data Science; Department of Mathematical Sciences; Language Technologies Institute; Machine Learning Department; Machine Learning Department; Machine Learning Department", "aff_domain": "cs.cmu.edu; ; ; ; ; ", "email": "cs.cmu.edu; ; ; ; ; ", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4996dcc43b5be197b5887a4e60817b1c-Abstract.html", "aff_unique_index": "0+1;2;3;0;0;0", "aff_unique_norm": "Carnegie Mellon University;University Affiliation Not Specified;Department of Mathematical Sciences;Language Technologies Institute", "aff_unique_dep": "Machine Learning Department;Department of Statistics & Data Science;Mathematical Sciences;Language Technologies", "aff_unique_url": "https://www.cs.cmu.edu/ml;;;", "aff_unique_abbr": "CMU ML;;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Nonparametric learning from Bayesian models with randomized objective functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11219", "id": "11219", "author_site": "Simon Lyddon, Stephen Walker, Chris C Holmes", "author": "Simon Lyddon; Stephen Walker; Chris C Holmes", "abstract": "Bayesian learning is built on an assumption that the model space contains a true reflection of the data generating mechanism. This assumption is problematic, particularly in complex data environments. Here we present a Bayesian nonparametric approach to learning that makes use of statistical models, but does not assume that the model is true. Our approach has provably better properties than using a parametric model and admits a Monte Carlo sampling scheme that can afford massive scalability on modern computer architectures. The model-based aspect of learning is particularly attractive for regularizing nonparametric inference when the sample size is small, and also for correcting approximate approaches such as variational Bayes (VB). We demonstrate the approach on a number of examples including VB classifiers and Bayesian random forests.", "bibtex": "@inproceedings{NEURIPS2018_b4d168b4,\n author = {Lyddon, Simon and Walker, Stephen and Holmes, Chris C},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Nonparametric learning from Bayesian models with randomized objective functions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b4d168b48157c623fbd095b4a565b5bb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b4d168b48157c623fbd095b4a565b5bb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b4d168b48157c623fbd095b4a565b5bb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b4d168b48157c623fbd095b4a565b5bb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b4d168b48157c623fbd095b4a565b5bb-Reviews.html", "metareview": "", "pdf_size": 534460, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10393170821311480664&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Statistics, University of Oxford; Department of Mathematics, University of Texas at Austin; Department of Statistics, University of Oxford", "aff_domain": "stats.ox.ac.uk;math.utexas.edu;stats.ox.ac.uk", "email": "stats.ox.ac.uk;math.utexas.edu;stats.ox.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b4d168b48157c623fbd095b4a565b5bb-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Oxford;University of Texas at Austin", "aff_unique_dep": "Department of Statistics;Department of Mathematics", "aff_unique_url": "https://www.ox.ac.uk;https://www.utexas.edu", "aff_unique_abbr": "Oxford;UT Austin", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Oxford;Austin", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Norm matters: efficient and accurate normalization schemes in deep networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11227", "id": "11227", "author_site": "Elad Hoffer, Ron Banner, Itay Golan, Daniel Soudry", "author": "Elad Hoffer; Ron Banner; Itay Golan; Daniel Soudry", "abstract": "Over the past few years, Batch-Normalization has been commonly used in deep networks, allowing faster training and high performance for a wide variety of applications. However, the reasons behind its merits remained unanswered, with several shortcomings that hindered its use for certain tasks. In this work, we present a novel view on the purpose and function of normalization methods and weight-decay, as tools to decouple weights' norm from the underlying optimized objective. This property highlights the connection between practices such as normalization, weight decay and learning-rate adjustments. We suggest several alternatives to the widely used $L^2$ batch-norm, using normalization in $L^1$ and $L^\\infty$ spaces that can substantially improve numerical stability in low-precision implementations as well as provide computational and memory benefits. We demonstrate that such methods enable the first batch-norm alternative to work for half-precision implementations. Finally, we suggest a modification to weight-normalization, which improves its performance on large-scale tasks.", "bibtex": "@inproceedings{NEURIPS2018_a0160709,\n author = {Hoffer, Elad and Banner, Ron and Golan, Itay and Soudry, Daniel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Norm matters: efficient and accurate normalization schemes in deep networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a0160709701140704575d499c997b6ca-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a0160709701140704575d499c997b6ca-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a0160709701140704575d499c997b6ca-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a0160709701140704575d499c997b6ca-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a0160709701140704575d499c997b6ca-Reviews.html", "metareview": "", "pdf_size": 454047, "gs_citation": 193, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12023191299459902610&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Technion - Israel Institute of Technology; Intel - Arti\ufb01cial Intelligence Products Group (AIPG); Technion - Israel Institute of Technology; Technion - Israel Institute of Technology", "aff_domain": "gmail.com;intel.com;gmail.com;gmail.com", "email": "gmail.com;intel.com;gmail.com;gmail.com", "github": "https://github.com/eladhoffer/norm_matters", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a0160709701140704575d499c997b6ca-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology;Intel", "aff_unique_dep": ";Artificial Intelligence Products Group (AIPG)", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.intel.com", "aff_unique_abbr": "Technion;Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Israel;United States" }, { "title": "Norm-Ranging LSH for Maximum Inner Product Search", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11301", "id": "11301", "author_site": "Xiao Yan, Jinfeng Li, Xinyan Dai, Hongzhi Chen, James Cheng", "author": "Xiao Yan; Jinfeng Li; Xinyan Dai; Hongzhi Chen; James Cheng", "abstract": "Neyshabur and Srebro proposed SIMPLE-LSH, which is the state-of-the-art hashing based algorithm for maximum inner product search (MIPS). We found that the performance of SIMPLE-LSH, in both theory and practice, suffers from long tails in the 2-norm distribution of real datasets. We propose NORM-RANGING LSH, which addresses the excessive normalization problem caused by long tails by partitioning a dataset into sub-datasets and building a hash index for each sub-dataset independently. We prove that NORM-RANGING LSH achieves lower query time complexity than SIMPLE-LSH under mild conditions. We also show that the idea of dataset partitioning can improve another hashing based MIPS algorithm. Experiments show that NORM-RANGING LSH probes much less items than SIMPLE-LSH at the same recall, thus significantly benefiting MIPS based applications.", "bibtex": "@inproceedings{NEURIPS2018_b60c5ab6,\n author = {Yan, Xiao and Li, Jinfeng and Dai, Xinyan and Chen, Hongzhi and Cheng, James},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Norm-Ranging LSH for Maximum Inner Product Search},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b60c5ab647a27045b462934977ccad9a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b60c5ab647a27045b462934977ccad9a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b60c5ab647a27045b462934977ccad9a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b60c5ab647a27045b462934977ccad9a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b60c5ab647a27045b462934977ccad9a-Reviews.html", "metareview": "", "pdf_size": 350367, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4956999863940081632&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Computer Science, The Chinese University of Hong Kong; Department of Computer Science, The Chinese University of Hong Kong; Department of Computer Science, The Chinese University of Hong Kong; Department of Computer Science, The Chinese University of Hong Kong; Department of Computer Science, The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk", "email": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b60c5ab647a27045b462934977ccad9a-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Object-Oriented Dynamics Predictor", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11931", "id": "11931", "author_site": "Guangxiang Zhu, Zhiao Huang, Chongjie Zhang", "author": "Guangxiang Zhu; Zhiao Huang; Chongjie Zhang", "abstract": "Generalization has been one of the major challenges for learning dynamics models in model-based reinforcement learning. However, previous work on action-conditioned dynamics prediction focuses on learning the pixel-level motion and thus does not generalize well to novel environments with different object layouts. In this paper, we present a novel object-oriented framework, called object-oriented dynamics predictor (OODP), which decomposes the environment into objects and predicts the dynamics of objects conditioned on both actions and object-to-object relations. It is an end-to-end neural network and can be trained in an unsupervised manner. To enable the generalization ability of dynamics learning, we design a novel CNN-based relation mechanism that is class-specific (rather than object-specific) and exploits the locality principle. Empirical results show that OODP significantly outperforms previous methods in terms of generalization over novel environments with various object layouts. OODP is able to learn from very few environments and accurately predict dynamics in a large number of unseen environments. In addition, OODP learns semantically and visually interpretable dynamics models.", "bibtex": "@inproceedings{NEURIPS2018_713fd63d,\n author = {Zhu, Guangxiang and Huang, Zhiao and Zhang, Chongjie},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Object-Oriented Dynamics Predictor},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/713fd63d76c8a57b16fc433fb4ae718a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/713fd63d76c8a57b16fc433fb4ae718a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/713fd63d76c8a57b16fc433fb4ae718a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/713fd63d76c8a57b16fc433fb4ae718a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/713fd63d76c8a57b16fc433fb4ae718a-Reviews.html", "metareview": "", "pdf_size": 1197468, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1811390955386289421&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Institute for Interdisciplinary Information Sciences, Tsinghua University, Beijing, China; Institute for Interdisciplinary Information Sciences, Tsinghua University, Beijing, China; Institute for Interdisciplinary Information Sciences, Tsinghua University, Beijing, China", "aff_domain": "outlook.com;mails.tsinghua.edu.cn;tsinghua.edu.cn", "email": "outlook.com;mails.tsinghua.edu.cn;tsinghua.edu.cn", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/713fd63d76c8a57b16fc433fb4ae718a-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Institute for Interdisciplinary Information Sciences", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "Tsinghua", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Objective and efficient inference for couplings in neuronal networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11487", "id": "11487", "author_site": "Yu Terada, Tomoyuki Obuchi, Takuya Isomura, Yoshiyuki Kabashima", "author": "Yu Terada; Tomoyuki Obuchi; Takuya Isomura; Yoshiyuki Kabashima", "abstract": "Inferring directional couplings from the spike data of networks is desired in various scientific fields such as neuroscience. Here, we apply a recently proposed objective procedure to the spike data obtained from the Hodgkin-Huxley type models and in vitro neuronal networks cultured in a circular structure. As a result, we succeed in reconstructing synaptic connections accurately from the evoked activity as well as the spontaneous one. To obtain the results, we invent an analytic formula approximately implementing a method of screening relevant couplings. This significantly reduces the computational cost of the screening method employed in the proposed objective procedure, making it possible to treat large-size systems as in this study.", "bibtex": "@inproceedings{NEURIPS2018_03cf8717,\n author = {Terada, Yu and Obuchi, Tomoyuki and Isomura, Takuya and Kabashima, Yoshiyuki},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Objective and efficient inference for couplings in neuronal networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/03cf87174debaccd689c90c34577b82f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/03cf87174debaccd689c90c34577b82f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/03cf87174debaccd689c90c34577b82f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/03cf87174debaccd689c90c34577b82f-Reviews.html", "metareview": "", "pdf_size": 2399323, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2789923089679719073&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Laboratory for Neural Computation and Adaptation, RIKEN Center for Brain Science, 2-1 Hirosawa, Wako, Saitama 351-0198, Japan + Department of Mathematical and Computer Science, Tokyo Institute of Technology, Tokyo 152-8550, Japan; Department of Mathematical and Computer Science, Tokyo Institute of Technology, Tokyo 152-8550, Japan; Laboratory for Neural Computation and Adaptation, RIKEN Center for Brain Science, 2-1 Hirosawa, Wako, Saitama 351-0198, Japan; Department of Mathematical and Computer Science, Tokyo Institute of Technology, Tokyo 152-8550, Japan", "aff_domain": "riken.jp;c.titech.ac.jp;riken.jp;c.titech.ac.jp", "email": "riken.jp;c.titech.ac.jp;riken.jp;c.titech.ac.jp", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/03cf87174debaccd689c90c34577b82f-Abstract.html", "aff_unique_index": "0+1;1;0;1", "aff_unique_norm": "RIKEN Center for Brain Science;Tokyo Institute of Technology", "aff_unique_dep": "Laboratory for Neural Computation and Adaptation;Department of Mathematical and Computer Science", "aff_unique_url": "https://www.riken.jp/en/;https://www.titech.ac.jp", "aff_unique_abbr": "RIKEN;Titech", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0+0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Occam's razor is insufficient to infer the preferences of irrational agents", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11546", "id": "11546", "author_site": "Stuart Armstrong, S\u00f6ren Mindermann", "author": "Stuart Armstrong; S\u00f6ren Mindermann", "abstract": "Inverse reinforcement learning (IRL) attempts to infer human rewards or preferences from observed behavior. Since human planning systematically deviates from rationality, several approaches have been tried to account for specific human shortcomings. \nHowever, the general problem of inferring the reward function of an agent of unknown rationality has received little attention.\nUnlike the well-known ambiguity problems in IRL, this one is practically relevant but cannot be resolved by observing the agent's policy in enough environments.\nThis paper shows (1) that a No Free Lunch result implies it is impossible to uniquely decompose a policy into a planning algorithm and reward function, and (2) that even with a reasonable simplicity prior/Occam's razor on the set of decompositions, we cannot distinguish between the true decomposition and others that lead to high regret.\nTo address this, we need simple `normative' assumptions, which cannot be deduced exclusively from observations.", "bibtex": "@inproceedings{NEURIPS2018_d89a66c7,\n author = {Armstrong, Stuart and Mindermann, S\\\"{o}ren},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Occam\\textquotesingle s razor is insufficient to infer the preferences of irrational agents},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d89a66c7c80a29b1bdbab0f2a1a94af8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d89a66c7c80a29b1bdbab0f2a1a94af8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d89a66c7c80a29b1bdbab0f2a1a94af8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d89a66c7c80a29b1bdbab0f2a1a94af8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d89a66c7c80a29b1bdbab0f2a1a94af8-Reviews.html", "metareview": "", "pdf_size": 354253, "gs_citation": 127, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3607710266968019591&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Vector Institute+University of Toronto; Future of Humanity Institute+University of Oxford+Machine Intelligence Research Institute", "aff_domain": "gmail.com;philosophy.ox.ac.uk", "email": "gmail.com;philosophy.ox.ac.uk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d89a66c7c80a29b1bdbab0f2a1a94af8-Abstract.html", "aff_unique_index": "0+1;2+3+4", "aff_unique_norm": "Vector Institute;University of Toronto;Future of Humanity Institute;University of Oxford;Machine Intelligence Research Institute", "aff_unique_dep": ";;;;", "aff_unique_url": "https://vectorinstitute.ai/;https://www.utoronto.ca;https://www.fhi.ox.ac.uk/;https://www.ox.ac.uk;https://www.mirilab.org", "aff_unique_abbr": "Vector Institute;U of T;FHI;Oxford;MIRI", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0+0;1+1+2", "aff_country_unique": "Canada;United Kingdom;United States" }, { "title": "On Binary Classification in Extreme Regions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11314", "id": "11314", "author_site": "Hamid Jalalzai, Stephan Cl\u00e9men\u00e7on, Anne Sabourin", "author": "Hamid JALALZAI; Stephan Cl\u00e9men\u00e7on; Anne Sabourin", "abstract": "In pattern recognition, a random label Y is to be predicted based upon observing a random vector X valued in $\\mathbb{R}^d$ with d>1 by means of a classification rule with minimum probability of error. In a wide variety of applications, ranging from finance/insurance to environmental sciences through teletraffic data analysis for instance, extreme (i.e. very large) observations X are of crucial importance, while contributing in a negligible manner to the (empirical) error however, simply because of their rarity. As a consequence, empirical risk minimizers generally perform very poorly in extreme regions. It is the purpose of this paper to develop a general framework for classification in the extremes. Precisely, under non-parametric heavy-tail assumptions for the class distributions, we prove that a natural and asymptotic notion of risk, accounting for predictive performance in extreme regions of the input space, can be defined and show that minimizers of an empirical version of a non-asymptotic approximant of this dedicated risk, based on a fraction of the largest observations, lead to classification rules with good generalization capacity, by means of maximal deviation inequalities in low probability regions. Beyond theoretical results, numerical experiments are presented in order to illustrate the relevance of the approach developed.", "bibtex": "@inproceedings{NEURIPS2018_0ebcc77d,\n author = {JALALZAI, Hamid and Cl\\'{e}men\\c{c}on, Stephan and Sabourin, Anne},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Binary Classification in Extreme Regions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0ebcc77dc72360d0eb8e9504c78d38bd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0ebcc77dc72360d0eb8e9504c78d38bd-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0ebcc77dc72360d0eb8e9504c78d38bd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0ebcc77dc72360d0eb8e9504c78d38bd-Reviews.html", "metareview": "", "pdf_size": 691701, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7307717703743155428&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "LTCI Telecom ParisTech, Universit \u00b4e Paris-Saclay; LTCI Telecom ParisTech, Universit \u00b4e Paris-Saclay; LTCI Telecom ParisTech, Universit \u00b4e Paris-Saclay", "aff_domain": "telecom-paristech.fr;telecom-paristech.fr;telecom-paristech.fr", "email": "telecom-paristech.fr;telecom-paristech.fr;telecom-paristech.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0ebcc77dc72360d0eb8e9504c78d38bd-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Telecom ParisTech", "aff_unique_dep": "LTCI", "aff_unique_url": "https://www.telecom-paris.fr", "aff_unique_abbr": "Telecom ParisTech", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Paris", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "On Controllable Sparse Alternatives to Softmax", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11621", "id": "11621", "author_site": "Anirban Laha, Saneem Ahmed Chemmengath, Priyanka Agrawal, Mitesh Khapra, Karthik Sankaranarayanan, Harish Ramaswamy", "author": "Anirban Laha; Saneem Ahmed Chemmengath; Priyanka Agrawal; Mitesh Khapra; Karthik Sankaranarayanan; Harish G. Ramaswamy", "abstract": "Converting an n-dimensional vector to a probability distribution over n objects is a commonly used component in many machine learning tasks like multiclass classification, multilabel classification, attention mechanisms etc. For this, several probability mapping functions have been proposed and employed in literature such as softmax, sum-normalization, spherical softmax, and sparsemax, but there is very little understanding in terms how they relate with each other. Further, none of the above formulations offer an explicit control over the degree of sparsity. To address this, we develop a unified framework that encompasses all these formulations as special cases. This framework ensures simple closed-form solutions and existence of sub-gradients suitable for learning via backpropagation. Within this framework, we propose two novel sparse formulations, sparsegen-lin and sparsehourglass, that seek to provide a control over the degree of desired sparsity. We further develop novel convex loss functions that help induce the behavior of aforementioned formulations in the multilabel classification setting, showing improved performance. We also demonstrate empirically that the proposed formulations, when used to compute attention weights, achieve better or comparable performance on standard seq2seq tasks like neural machine translation and abstractive summarization.", "bibtex": "@inproceedings{NEURIPS2018_6a4d5952,\n author = {Laha, Anirban and Chemmengath, Saneem Ahmed and Agrawal, Priyanka and Khapra, Mitesh and Sankaranarayanan, Karthik and Ramaswamy, Harish G},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Controllable Sparse Alternatives to Softmax},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6a4d5952d4c018a1c1af9fa590a10dda-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6a4d5952d4c018a1c1af9fa590a10dda-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6a4d5952d4c018a1c1af9fa590a10dda-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6a4d5952d4c018a1c1af9fa590a10dda-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6a4d5952d4c018a1c1af9fa590a10dda-Reviews.html", "metareview": "", "pdf_size": 692643, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3504521471854604409&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "IBM Research; IBM Research; IBM Research; Robert Bosch Center for DS and AI, and Dept of CSE, IIT Madras; IBM Research; Robert Bosch Center for DS and AI, and Dept of CSE, IIT Madras", "aff_domain": "in.ibm.com;in.ibm.com; ;iitm.ac.in; ;iitm.ac.in", "email": "in.ibm.com;in.ibm.com; ;iitm.ac.in; ;iitm.ac.in", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6a4d5952d4c018a1c1af9fa590a10dda-Abstract.html", "aff_unique_index": "0;0;0;1;0;1", "aff_unique_norm": "IBM;Indian Institute of Technology Madras", "aff_unique_dep": "IBM Research;Dept of CSE", "aff_unique_url": "https://www.ibm.com/research;https://www.iitm.ac.in", "aff_unique_abbr": "IBM;IIT Madras", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Madras", "aff_country_unique_index": "0;0;0;1;0;1", "aff_country_unique": "United States;India" }, { "title": "On Coresets for Logistic Regression", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11634", "id": "11634", "author_site": "Alexander Munteanu, Chris Schwiegelshohn, Christian Sohler, David Woodruff", "author": "Alexander Munteanu; Chris Schwiegelshohn; Christian Sohler; David Woodruff", "abstract": "Coresets are one of the central methods to facilitate the analysis of large data. We continue a recent line of research applying the theory of coresets to logistic regression. First, we show the negative result that no strongly sublinear sized coresets exist for logistic regression. To deal with intractable worst-case instances we introduce a complexity measure $\\mu(X)$, which quantifies the hardness of compressing a data set for logistic regression. $\\mu(X)$ has an intuitive statistical interpretation that may be of independent interest. For data sets with bounded $\\mu(X)$-complexity, we show that a novel sensitivity sampling scheme produces the first provably sublinear $(1\\pm\\eps)$-coreset. We illustrate the performance of our method by comparing to uniform sampling as well as to state of the art methods in the area. The experiments are conducted on real world benchmark data for logistic regression.", "bibtex": "@inproceedings{NEURIPS2018_63bfd6e8,\n author = {Munteanu, Alexander and Schwiegelshohn, Chris and Sohler, Christian and Woodruff, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Coresets for Logistic Regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/63bfd6e8f26d1d3537f4c5038264ef36-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/63bfd6e8f26d1d3537f4c5038264ef36-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/63bfd6e8f26d1d3537f4c5038264ef36-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/63bfd6e8f26d1d3537f4c5038264ef36-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/63bfd6e8f26d1d3537f4c5038264ef36-Reviews.html", "metareview": "", "pdf_size": 570231, "gs_citation": 126, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18350190801394048918&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Department of Computer Science, TU Dortmund University; Department of Computer Science, Sapienza University of Rome; Department of Computer Science, TU Dortmund University; Department of Computer Science, Carnegie Mellon University", "aff_domain": "tu-dortmund.de;diag.uniroma1.it;tu-dortmund.de;cs.cmu.edu", "email": "tu-dortmund.de;diag.uniroma1.it;tu-dortmund.de;cs.cmu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/63bfd6e8f26d1d3537f4c5038264ef36-Abstract.html", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "TU Dortmund University;Sapienza University of Rome;Carnegie Mellon University", "aff_unique_dep": "Department of Computer Science;Department of Computer Science;Department of Computer Science", "aff_unique_url": "https://www.tu-dortmund.de;https://www.uniroma1.it;https://www.cmu.edu", "aff_unique_abbr": "TU Dortmund;Sapienza;CMU", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Dortmund;Rome;", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Germany;Italy;United States" }, { "title": "On Fast Leverage Score Sampling and Optimal Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11553", "id": "11553", "author_site": "Alessandro Rudi, Daniele Calandriello, Luigi Carratino, Lorenzo Rosasco", "author": "Alessandro Rudi; Daniele Calandriello; Luigi Carratino; Lorenzo Rosasco", "abstract": "Leverage score sampling provides an appealing way to perform approximate com- putations for large matrices. Indeed, it allows to derive faithful approximations with a complexity adapted to the problem at hand. Yet, performing leverage scores sampling is a challenge in its own right requiring further approximations. In this paper, we study the problem of leverage score sampling for positive definite ma- trices defined by a kernel. Our contribution is twofold. First we provide a novel algorithm for leverage score sampling and second, we exploit the proposed method in statistical learning by deriving a novel solver for kernel ridge regression. Our main technical contribution is showing that the proposed algorithms are currently the most efficient and accurate for these problems.", "bibtex": "@inproceedings{NEURIPS2018_56584778,\n author = {Rudi, Alessandro and Calandriello, Daniele and Carratino, Luigi and Rosasco, Lorenzo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Fast Leverage Score Sampling and Optimal Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/56584778d5a8ab88d6393cc4cd11e090-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/56584778d5a8ab88d6393cc4cd11e090-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/56584778d5a8ab88d6393cc4cd11e090-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/56584778d5a8ab88d6393cc4cd11e090-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/56584778d5a8ab88d6393cc4cd11e090-Reviews.html", "metareview": "", "pdf_size": 721155, "gs_citation": 117, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6173645811972804817&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "INRIA \u2013 Sierra team, ENS, Paris; LCSL \u2013 IIT & MIT, Genoa, Italy; University of Genoa, Genoa, Italy; University of Genoa, LCSL \u2013 IIT & MIT", "aff_domain": "inria.fr;iit.it; ; ", "email": "inria.fr;iit.it; ; ", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/56584778d5a8ab88d6393cc4cd11e090-Abstract.html", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "INRIA;Istituto Italiano di Tecnologia (IIT);University of Genoa", "aff_unique_dep": "Sierra team;LCSL;", "aff_unique_url": "https://www.inria.fr;https://www.iit.it;https://www.unige.it", "aff_unique_abbr": "INRIA;IIT;UniGe", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Paris;Genoa;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "France;Italy" }, { "title": "On GANs and GMMs", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11569", "id": "11569", "author_site": "Eitan Richardson, Yair Weiss", "author": "Eitan Richardson; Yair Weiss", "abstract": "A longstanding problem in machine learning is to find unsupervised methods that can learn the statistical structure of high dimensional signals. In recent years, GANs have gained much attention as a possible solution to the problem, and in particular have shown the ability to generate remarkably realistic high resolution sampled images. At the same time, many authors have pointed out that GANs may fail to model the full distribution (\"mode collapse\") and that using the learned models for anything other than generating samples may be very difficult.", "bibtex": "@inproceedings{NEURIPS2018_0172d289,\n author = {Richardson, Eitan and Weiss, Yair},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On GANs and GMMs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0172d289da48c48de8c5ebf3de9f7ee1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0172d289da48c48de8c5ebf3de9f7ee1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0172d289da48c48de8c5ebf3de9f7ee1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0172d289da48c48de8c5ebf3de9f7ee1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0172d289da48c48de8c5ebf3de9f7ee1-Reviews.html", "metareview": "", "pdf_size": 3105215, "gs_citation": 198, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=809414118731916677&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "School of Computer Science and Engineering, The Hebrew University of Jerusalem, Jerusalem, Israel; School of Computer Science and Engineering, The Hebrew University of Jerusalem, Jerusalem, Israel", "aff_domain": "cs.huji.ac.il;cs.huji.ac.il", "email": "cs.huji.ac.il;cs.huji.ac.il", "github": "https://github.com/eitanrich/gans-n-gmms", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0172d289da48c48de8c5ebf3de9f7ee1-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Hebrew University of Jerusalem", "aff_unique_dep": "School of Computer Science and Engineering", "aff_unique_url": "http://www.huji.ac.il", "aff_unique_abbr": "HUJI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Jerusalem", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "On Learning Intrinsic Rewards for Policy Gradient Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11457", "id": "11457", "author_site": "Zeyu Zheng, Junhyuk Oh, Satinder Singh", "author": "Zeyu Zheng; Junhyuk Oh; Satinder Singh", "abstract": "In many sequential decision making tasks, it is challenging to design reward functions that help an RL agent efficiently learn behavior that is considered good by the agent designer. A number of different formulations of the reward-design problem, or close variants thereof, have been proposed in the literature. In this paper we build on the Optimal Rewards Framework of Singh et al. that defines the optimal intrinsic reward function as one that when used by an RL agent achieves behavior that optimizes the task-specifying or extrinsic reward function. Previous work in this framework has shown how good intrinsic reward functions can be learned for lookahead search based planning agents. Whether it is possible to learn intrinsic reward functions for learning agents remains an open problem. In this paper we derive a novel algorithm for learning intrinsic rewards for policy-gradient based learning agents. We compare the performance of an augmented agent that uses our algorithm to provide additive intrinsic rewards to an A2C-based policy learner (for Atari games) and a PPO-based policy learner (for Mujoco domains) with a baseline agent that uses the same policy learners but with only extrinsic rewards. Our results show improved performance on most but not all of the domains.", "bibtex": "@inproceedings{NEURIPS2018_51de85dd,\n author = {Zheng, Zeyu and Oh, Junhyuk and Singh, Satinder},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Learning Intrinsic Rewards for Policy Gradient Methods},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/51de85ddd068f0bc787691d356176df9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/51de85ddd068f0bc787691d356176df9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/51de85ddd068f0bc787691d356176df9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/51de85ddd068f0bc787691d356176df9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/51de85ddd068f0bc787691d356176df9-Reviews.html", "metareview": "", "pdf_size": 1748165, "gs_citation": 239, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8658005357410230302&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Computer Science & Engineering, University of Michigan; Computer Science & Engineering, University of Michigan; Computer Science & Engineering, University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu", "email": "umich.edu;umich.edu;umich.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/51de85ddd068f0bc787691d356176df9-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "Computer Science & Engineering", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On Learning Markov Chains", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11087", "id": "11087", "author_site": "Yi Hao, Alon Orlitsky, Venkatadheeraj Pichapati", "author": "Yi Hao; Alon Orlitsky; Venkatadheeraj Pichapati", "abstract": "The problem of estimating an unknown discrete distribution from its samples is a fundamental tenet of statistical learning. Over the past decade, it attracted significant research effort and has been solved for a variety of divergence measures. Surprisingly, an equally important problem, estimating an unknown Markov chain from its samples, is still far from understood. We consider two problems related to the min-max risk (expected loss) of estimating an unknown k-state Markov chain from its n sequential samples: predicting the conditional distribution of the next sample with respect to the KL-divergence, and estimating the transition matrix with respect to a natural loss induced by KL or a more general f-divergence measure.", "bibtex": "@inproceedings{NEURIPS2018_d34ab169,\n author = {Hao, Yi and Orlitsky, Alon and Pichapati, Venkatadheeraj},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Learning Markov Chains},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d34ab169b70c9dcd35e62896010cd9ff-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d34ab169b70c9dcd35e62896010cd9ff-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d34ab169b70c9dcd35e62896010cd9ff-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d34ab169b70c9dcd35e62896010cd9ff-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d34ab169b70c9dcd35e62896010cd9ff-Reviews.html", "metareview": "", "pdf_size": 367768, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12924776916617119318&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Dept. of Electrical and Computer Engineering, University of California, San Diego; Dept. of Electrical and Computer Engineering, University of California, San Diego; Dept. of Electrical and Computer Engineering, University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu", "email": "ucsd.edu;ucsd.edu;ucsd.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d34ab169b70c9dcd35e62896010cd9ff-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "Dept. of Electrical and Computer Engineering", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On Markov Chain Gradient Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11939", "id": "11939", "author_site": "Tao Sun, Yuejiao Sun, Wotao Yin", "author": "Tao Sun; Yuejiao Sun; Wotao Yin", "abstract": "Stochastic gradient methods are the workhorse (algorithms) of large-scale optimization problems in machine learning, signal processing, and other computational sciences and engineering. This paper studies Markov chain gradient descent, a variant of stochastic gradient descent where the random samples are taken on the trajectory of a Markov chain. Existing results of this method assume convex objectives and a reversible Markov chain and thus have their limitations. We establish new non-ergodic convergence under wider step sizes, for nonconvex problems, and for non-reversible finite-state Markov chains. Nonconvexity makes our method applicable to broader problem classes. Non-reversible finite-state Markov chains, on the other hand, can mix substatially faster. To obtain these results, we introduce a new technique that varies the mixing levels of the Markov chains. The reported numerical results validate our contributions.", "bibtex": "@inproceedings{NEURIPS2018_1371bcce,\n author = {Sun, Tao and Sun, Yuejiao and Yin, Wotao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Markov Chain Gradient Descent},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1371bccec2447b5aa6d96d2a540fb401-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1371bccec2447b5aa6d96d2a540fb401-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1371bccec2447b5aa6d96d2a540fb401-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1371bccec2447b5aa6d96d2a540fb401-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1371bccec2447b5aa6d96d2a540fb401-Reviews.html", "metareview": "", "pdf_size": 746237, "gs_citation": 122, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8900302487729907256&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "College of Computer, National University of Defense Technology; Department of Mathematics, University of California, Los Angeles; Department of Mathematics, University of California, Los Angeles", "aff_domain": "163.com;math.ucla.edu;math.ucla.edu", "email": "163.com;math.ucla.edu;math.ucla.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1371bccec2447b5aa6d96d2a540fb401-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "National University of Defense Technology;University of California, Los Angeles", "aff_unique_dep": "College of Computer;Department of Mathematics", "aff_unique_url": "http://www.nudt.edu.cn/;https://www.ucla.edu", "aff_unique_abbr": "NUDT;UCLA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "title": "On Misinformation Containment in Online Social Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11059", "id": "11059", "author_site": "Amo Tong, Ding-Zhu Du, Weili Wu", "author": "Amo Tong; Ding-Zhu Du; Weili Wu", "abstract": "The widespread online misinformation could cause public panic and serious economic damages. The misinformation containment problem aims at limiting the spread of misinformation in online social networks by launching competing campaigns. Motivated by realistic scenarios, we present the first analysis of the misinformation containment problem for the case when an arbitrary number of cascades are allowed. This paper makes four contributions. First, we provide a formal model for multi-cascade diffusion and introduce an important concept called as cascade priority. Second, we show that the misinformation containment problem cannot be approximated within a factor of $\\Omega(2^{\\log^{1-\\epsilon}n^4})$ in polynomial time unless $NP \\subseteq DTIME(n^{\\polylog{n}})$. Third, we introduce several types of cascade priority that are frequently seen in real social networks. Finally, we design novel algorithms for solving the misinformation containment problem. The effectiveness of the proposed algorithm is supported by encouraging experimental results.", "bibtex": "@inproceedings{NEURIPS2018_9b04d152,\n author = {Tong, Amo and Du, Ding-Zhu and Wu, Weili},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Misinformation Containment in Online Social Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9b04d152845ec0a378394003c96da594-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9b04d152845ec0a378394003c96da594-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9b04d152845ec0a378394003c96da594-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9b04d152845ec0a378394003c96da594-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9b04d152845ec0a378394003c96da594-Reviews.html", "metareview": "", "pdf_size": 586194, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9584360203786977867&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Department of Computer and Information Sciences, University of Delaware; Department of Computer Science, University of Texas at Dallas; Department of Computer Science, University of Texas at Dallas", "aff_domain": "udel.edu;utdallas.edu;utdallas.edu", "email": "udel.edu;utdallas.edu;utdallas.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9b04d152845ec0a378394003c96da594-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Delaware;University of Texas at Dallas", "aff_unique_dep": "Department of Computer and Information Sciences;Department of Computer Science", "aff_unique_url": "https://www.udel.edu;https://www.utdallas.edu", "aff_unique_abbr": "UD;UT Dallas", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Dallas", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On Neuronal Capacity", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11742", "id": "11742", "author_site": "Pierre Baldi, Roman Vershynin", "author": "Pierre Baldi; Roman Vershynin", "abstract": "We define the capacity of a learning machine to be the logarithm of the number (or volume) of the functions it can implement. We review known results, and derive new results, estimating the capacity of several neuronal models: linear and polynomial threshold gates, linear and polynomial threshold gates with constrained weights (binary weights, positive weights), and ReLU neurons. We also derive capacity estimates and bounds for fully recurrent networks and layered feedforward networks.", "bibtex": "@inproceedings{NEURIPS2018_a292f1c5,\n author = {Baldi, Pierre and Vershynin, Roman},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Neuronal Capacity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a292f1c5874b2be8395ffd75f313937f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a292f1c5874b2be8395ffd75f313937f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a292f1c5874b2be8395ffd75f313937f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a292f1c5874b2be8395ffd75f313937f-Reviews.html", "metareview": "", "pdf_size": 396857, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1941721068797105688&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science, University of California, Irvine; Department of Mathematics, University of California, Irvine", "aff_domain": "uci.edu;uci.edu", "email": "uci.edu;uci.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a292f1c5874b2be8395ffd75f313937f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "On Oracle-Efficient PAC RL with Rich Observations", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11158", "id": "11158", "author_site": "Christoph Dann, Nan Jiang, Akshay Krishnamurthy, Alekh Agarwal, John Langford, Robert Schapire", "author": "Christoph Dann; Nan Jiang; Akshay Krishnamurthy; Alekh Agarwal; John Langford; Robert E. Schapire", "abstract": "We study the computational tractability of PAC reinforcement learning with rich observations. We present new provably sample-efficient algorithms for environments with deterministic hidden state dynamics and stochastic rich observations. These methods operate in an oracle model of computation -- accessing policy and value function classes exclusively through standard optimization primitives -- and therefore represent computationally efficient alternatives to prior algorithms that require enumeration. With stochastic hidden state dynamics, we prove that the only known sample-efficient algorithm, OLIVE, cannot be implemented in the oracle model. We also present several examples that illustrate fundamental challenges of tractable PAC reinforcement learning in such general settings.", "bibtex": "@inproceedings{NEURIPS2018_5f0f5e5f,\n author = {Dann, Christoph and Jiang, Nan and Krishnamurthy, Akshay and Agarwal, Alekh and Langford, John and Schapire, Robert E},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Oracle-Efficient PAC RL with Rich Observations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5f0f5e5f33945135b874349cfbed4fb9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5f0f5e5f33945135b874349cfbed4fb9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5f0f5e5f33945135b874349cfbed4fb9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5f0f5e5f33945135b874349cfbed4fb9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5f0f5e5f33945135b874349cfbed4fb9-Reviews.html", "metareview": "", "pdf_size": 631477, "gs_citation": 140, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1452591852193660877&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Carnegie Mellon University; UIUC; Microsoft Research; Microsoft Research; Microsoft Research; Microsoft Research", "aff_domain": "cdann.net;illinois.edu;cs.umass.edu;microsoft.com;microsoft.com;microsoft.com", "email": "cdann.net;illinois.edu;cs.umass.edu;microsoft.com;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5f0f5e5f33945135b874349cfbed4fb9-Abstract.html", "aff_unique_index": "0;1;2;2;2;2", "aff_unique_norm": "Carnegie Mellon University;University of Illinois Urbana-Champaign;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.cmu.edu;https://www illinois.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "CMU;UIUC;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "On gradient regularizers for MMD GANs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11647", "id": "11647", "author_site": "Michael Arbel, Danica J. Sutherland, Miko\u0142aj Bi\u0144kowski, Arthur Gretton", "author": "Michael Arbel; Danica J. Sutherland; Miko\u0142aj Bi\u0144kowski; Arthur Gretton", "abstract": "We propose a principled method for gradient-based regularization of the critic of GAN-like models trained by adversarially optimizing the kernel of a Maximum Mean Discrepancy (MMD). We show that controlling the gradient of the critic is vital to having a sensible loss function, and devise a method to enforce exact, analytical gradient constraints at no additional cost compared to existing approximate techniques based on additive regularizers. The new loss function is provably continuous, and experiments show that it stabilizes and accelerates training, giving image generation models that outperform state-of-the art methods on $160 \\times 160$ CelebA and $64 \\times 64$ unconditional ImageNet.", "bibtex": "@inproceedings{NEURIPS2018_07f75d91,\n author = {Arbel, Michael and Sutherland, Danica J. and Bi\\'{n}kowski, Miko\\l aj and Gretton, Arthur},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On gradient regularizers for MMD GANs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/07f75d9144912970de5a09f5a305e10c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/07f75d9144912970de5a09f5a305e10c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/07f75d9144912970de5a09f5a305e10c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/07f75d9144912970de5a09f5a305e10c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/07f75d9144912970de5a09f5a305e10c-Reviews.html", "metareview": "", "pdf_size": 2837952, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12044208657387141906&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Gatsby Computational Neuroscience Unit, University College London; Gatsby Computational Neuroscience Unit, University College London; Department of Mathematics, Imperial College London; Gatsby Computational Neuroscience Unit, University College London", "aff_domain": "gmail.com;djsutherland.ml;gmail.com;gmail.com", "email": "gmail.com;djsutherland.ml;gmail.com;gmail.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/07f75d9144912970de5a09f5a305e10c-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University College London;Imperial College London", "aff_unique_dep": "Gatsby Computational Neuroscience Unit;Department of Mathematics", "aff_unique_url": "https://www.ucl.ac.uk;https://www.imperial.ac.uk", "aff_unique_abbr": "UCL;Imperial", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "On preserving non-discrimination when combining expert advice", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11801", "id": "11801", "author_site": "Avrim Blum, Suriya Gunasekar, Thodoris Lykouris, Nati Srebro", "author": "Avrim Blum; Suriya Gunasekar; Thodoris Lykouris; Nati Srebro", "abstract": "We study the interplay between sequential decision making and avoiding discrimination against protected groups, when examples arrive online and do not follow distributional assumptions. We consider the most basic extension of classical online learning: Given a class of predictors that are individually non-discriminatory with respect to a particular metric, how can we combine them to perform as well as the best predictor, while preserving non-discrimination? Surprisingly we show that this task is unachievable for the prevalent notion of \"equalized odds\" that requires equal false negative rates and equal false positive rates across groups. On the positive side, for another notion of non-discrimination, \"equalized error rates\", we show that running separate instances of the classical multiplicative weights algorithm for each group achieves this guarantee. Interestingly, even for this notion, we show that algorithms with stronger performance guarantees than multiplicative weights cannot preserve non-discrimination.", "bibtex": "@inproceedings{NEURIPS2018_2e855f94,\n author = {Blum, Avrim and Gunasekar, Suriya and Lykouris, Thodoris and Srebro, Nati},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On preserving non-discrimination when combining expert advice},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2e855f9489df0712b4bd8ea9e2848c5a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2e855f9489df0712b4bd8ea9e2848c5a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2e855f9489df0712b4bd8ea9e2848c5a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2e855f9489df0712b4bd8ea9e2848c5a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2e855f9489df0712b4bd8ea9e2848c5a-Reviews.html", "metareview": "", "pdf_size": 298769, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11471879666379464933&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "TTI-Chicago; TTI-Chicago; Cornell University; TTI-Chicago", "aff_domain": "ttic.edu;ttic.edu;cs.cornell.edu;ttic.edu", "email": "ttic.edu;ttic.edu;cs.cornell.edu;ttic.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2e855f9489df0712b4bd8ea9e2848c5a-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Toyota Technological Institute at Chicago;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tti-chicago.org;https://www.cornell.edu", "aff_unique_abbr": "TTI;Cornell", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "On the Convergence and Robustness of Training GANs with Regularized Optimal Transport", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11683", "id": "11683", "author_site": "Maziar Sanjabi, Jimmy Ba, Meisam Razaviyayn, Jason Lee", "author": "Maziar Sanjabi; Jimmy Ba; Meisam Razaviyayn; Jason Lee", "abstract": "Generative Adversarial Networks (GANs) are one of the most practical methods for learning data distributions. A popular GAN formulation is based on the use of Wasserstein distance as a metric between probability distributions. Unfortunately, minimizing the Wasserstein distance between the data distribution and the generative model distribution is a computationally challenging problem as its objective is non-convex, non-smooth, and even hard to compute. In this work, we show that obtaining gradient information of the smoothed Wasserstein GAN formulation, which is based on regularized Optimal Transport (OT), is computationally effortless and hence one can apply first order optimization methods to minimize this objective. Consequently, we establish theoretical convergence guarantee to stationarity for a proposed class of GAN optimization algorithms. Unlike the original non-smooth formulation, our algorithm only requires solving the discriminator to approximate optimality. We apply our method to learning MNIST digits as well as CIFAR-10 images. Our experiments show that our method is computationally efficient and generates images comparable to the state of the art algorithms given the same architecture and computational power.", "bibtex": "@inproceedings{NEURIPS2018_5a9d8bf5,\n author = {Sanjabi, Maziar and Ba, Jimmy and Razaviyayn, Meisam and Lee, Jason D},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On the Convergence and Robustness of Training GANs with Regularized Optimal Transport},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5a9d8bf5b7a4b35f3110dde8673bdda2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5a9d8bf5b7a4b35f3110dde8673bdda2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5a9d8bf5b7a4b35f3110dde8673bdda2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5a9d8bf5b7a4b35f3110dde8673bdda2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5a9d8bf5b7a4b35f3110dde8673bdda2-Reviews.html", "metareview": "", "pdf_size": 1666594, "gs_citation": 173, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14171214713517537366&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of Southern California; University of Toronto; University of Southern California; University of Southern California", "aff_domain": "usc.edu;cs.toronto.edu;usc.edu;marshall.usc.edu", "email": "usc.edu;cs.toronto.edu;usc.edu;marshall.usc.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5a9d8bf5b7a4b35f3110dde8673bdda2-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Southern California;University of Toronto", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;https://www.utoronto.ca", "aff_unique_abbr": "USC;U of T", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "On the Dimensionality of Word Embedding", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11110", "id": "11110", "author_site": "Zi Yin, Yuanyuan Shen", "author": "Zi Yin; Yuanyuan Shen", "abstract": "In this paper, we provide a theoretical understanding of word embedding and its dimensionality. Motivated by the unitary-invariance of word embedding, we propose the Pairwise Inner Product (PIP) loss, a novel metric on the dissimilarity between word embeddings. Using techniques from matrix perturbation theory, we reveal a fundamental bias-variance trade-off in dimensionality selection for word embeddings. This bias-variance trade-off sheds light on many empirical observations which were previously unexplained, for example the existence of an optimal dimensionality. Moreover, new insights and discoveries, like when and how word embeddings are robust to over-fitting, are revealed. By optimizing over the bias-variance trade-off of the PIP loss, we can explicitly answer the open question of dimensionality selection for word embedding.", "bibtex": "@inproceedings{NEURIPS2018_b534ba68,\n author = {Yin, Zi and Shen, Yuanyuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On the Dimensionality of Word Embedding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b534ba68236ba543ae44b22bd110a1d6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b534ba68236ba543ae44b22bd110a1d6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b534ba68236ba543ae44b22bd110a1d6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b534ba68236ba543ae44b22bd110a1d6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b534ba68236ba543ae44b22bd110a1d6-Reviews.html", "metareview": "", "pdf_size": 828674, "gs_citation": 301, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5195001394061384953&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Stanford University; Microsoft Corp. + Stanford University", "aff_domain": "gmail.com;microsoft.com", "email": "gmail.com;microsoft.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b534ba68236ba543ae44b22bd110a1d6-Abstract.html", "aff_unique_index": "0;1+0", "aff_unique_norm": "Stanford University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.stanford.edu;https://www.microsoft.com", "aff_unique_abbr": "Stanford;Microsoft", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0+0", "aff_country_unique": "United States" }, { "title": "On the Global Convergence of Gradient Descent for Over-parameterized Models using Optimal Transport", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11309", "id": "11309", "author_site": "L\u00e9na\u00efc Chizat, Francis Bach", "author": "L\u00e9na\u00efc Chizat; Francis Bach", "abstract": "Many tasks in machine learning and signal processing can be solved by minimizing a convex function of a measure. This includes sparse spikes deconvolution or training a neural network with a single hidden layer. For these problems, we study a simple minimization method: the unknown measure is discretized into a mixture of particles and a continuous-time gradient descent is performed on their weights and positions. This is an idealization of the usual way to train neural networks with a large hidden layer. We show that, when initialized correctly and in the many-particle limit, this gradient flow, although non-convex, converges to global minimizers. The proof involves Wasserstein gradient flows, a by-product of optimal transport theory. Numerical experiments show that this asymptotic behavior is already at play for a reasonable number of particles, even in high dimension.", "bibtex": "@inproceedings{NEURIPS2018_a1afc58c,\n author = {Chizat, L\\'{e}na\\\"{\\i}c and Bach, Francis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On the Global Convergence of Gradient Descent for Over-parameterized Models using Optimal Transport},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a1afc58c6ca9540d057299ec3016d726-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a1afc58c6ca9540d057299ec3016d726-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a1afc58c6ca9540d057299ec3016d726-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a1afc58c6ca9540d057299ec3016d726-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a1afc58c6ca9540d057299ec3016d726-Reviews.html", "metareview": "", "pdf_size": 791799, "gs_citation": 961, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2184449021441865846&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "INRIA, ENS, PSL Research University; INRIA, ENS, PSL Research University", "aff_domain": "inria.fr;inria.fr", "email": "inria.fr;inria.fr", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a1afc58c6ca9540d057299ec3016d726-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "On the Local Hessian in Back-propagation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11630", "id": "11630", "author_site": "Huishuai Zhang, Wei Chen, Tie-Yan Liu", "author": "Huishuai Zhang; Wei Chen; Tie-Yan Liu", "abstract": "Back-propagation (BP) is the foundation for successfully training deep neural networks. However, BP sometimes has difficulties in propagating a learning signal deep enough effectively, e.g., the vanishing gradient phenomenon. Meanwhile, BP often works well when combining with ``designing tricks'' like orthogonal initialization, batch normalization and skip connection. There is no clear understanding on what is essential to the efficiency of BP. In this paper, we take one step towards clarifying this problem. We view BP as a solution of back-matching propagation which minimizes a sequence of back-matching losses each corresponding to one block of the network. We study the Hessian of the local back-matching loss (local Hessian) and connect it to the efficiency of BP. It turns out that those designing tricks facilitate BP by improving the spectrum of local Hessian. In addition, we can utilize the local Hessian to balance the training pace of each block and design new training algorithms. Based on a scalar approximation of local Hessian, we propose a scale-amended SGD algorithm. We apply it to train neural networks with batch normalization, and achieve favorable results over vanilla SGD. This corroborates the importance of local Hessian from another side.", "bibtex": "@inproceedings{NEURIPS2018_b6d67a24,\n author = {Zhang, Huishuai and Chen, Wei and Liu, Tie-Yan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On the Local Hessian in Back-propagation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b6d67a24906e8a8541291882f81d31ca-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b6d67a24906e8a8541291882f81d31ca-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b6d67a24906e8a8541291882f81d31ca-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b6d67a24906e8a8541291882f81d31ca-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b6d67a24906e8a8541291882f81d31ca-Reviews.html", "metareview": "", "pdf_size": 786128, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13649547267799644290&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b6d67a24906e8a8541291882f81d31ca-Abstract.html" }, { "title": "On the Local Minima of the Empirical Risk", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11480", "id": "11480", "author_site": "Chi Jin, Lydia T. Liu, Rong Ge, Michael Jordan", "author": "Chi Jin; Lydia T. Liu; Rong Ge; Michael I Jordan", "abstract": "Population risk is always of primary interest in machine learning; however, learning algorithms only have access to the empirical risk. Even for applications with nonconvex non-smooth losses (such as modern deep networks), the population risk is generally significantly more well behaved from an optimization point of view than the empirical risk. In particular, sampling can create many spurious local minima. We consider a general framework which aims to optimize a smooth nonconvex function $F$ (population risk) given only access to an approximation $f$ (empirical risk) that is pointwise close to $F$ (i.e., $\\norm{F-f}_{\\infty} \\le \\nu$). Our objective is to find the $\\epsilon$-approximate local minima of the underlying function $F$ while avoiding the shallow local minima---arising because of the tolerance $\\nu$---which exist only in $f$. We propose a simple algorithm based on stochastic gradient descent (SGD) on a smoothed version of $f$ that is guaranteed \nto achieve our goal as long as $\\nu \\le O(\\epsilon^{1.5}/d)$. We also provide an almost matching lower bound showing that our algorithm achieves optimal error tolerance $\\nu$ among all algorithms making a polynomial number of queries of $f$. As a concrete example, we show that our results can be directly used to give sample complexities for learning a ReLU unit.", "bibtex": "@inproceedings{NEURIPS2018_da4902cb,\n author = {Jin, Chi and Liu, Lydia T. and Ge, Rong and Jordan, Michael I},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On the Local Minima of the Empirical Risk},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/da4902cb0bc38210839714ebdcf0efc3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/da4902cb0bc38210839714ebdcf0efc3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/da4902cb0bc38210839714ebdcf0efc3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/da4902cb0bc38210839714ebdcf0efc3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/da4902cb0bc38210839714ebdcf0efc3-Reviews.html", "metareview": "", "pdf_size": 1388843, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17678335634062567222&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "University of California, Berkeley; University of California, Berkeley; Duke University; University of California, Berkeley", "aff_domain": "cs.berkeley.edu;cs.berkeley.edu;cs.duke.edu;cs.berkeley.edu", "email": "cs.berkeley.edu;cs.berkeley.edu;cs.duke.edu;cs.berkeley.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/da4902cb0bc38210839714ebdcf0efc3-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Berkeley;Duke University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.duke.edu", "aff_unique_abbr": "UC Berkeley;Duke", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "One-Shot Unsupervised Cross Domain Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11222", "id": "11222", "author_site": "Sagie Benaim, Lior Wolf", "author": "Sagie Benaim; Lior Wolf", "abstract": "Given a single image $x$ from domain $A$ and a set of images from domain $B$, our task is to generate the analogous of $x$ in $B$. We argue that this task could be a key AI capability that underlines the ability of cognitive agents to act in the world and present empirical evidence that the existing unsupervised domain translation methods fail on this task. Our method follows a two step process. First, a variational autoencoder for domain $B$ is trained. Then, given the new sample $x$, we create a variational autoencoder for domain $A$ by adapting the layers that are close to the image in order to directly fit $x$, and only indirectly adapt the other layers. Our experiments indicate that the new method does as well, when trained on one sample $x$, as the existing domain transfer methods, when these enjoy a multitude of training samples from domain $A$. Our code is made publicly available at https://github.com/sagiebenaim/OneShotTranslation", "bibtex": "@inproceedings{NEURIPS2018_062ddb6c,\n author = {Benaim, Sagie and Wolf, Lior},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {One-Shot Unsupervised Cross Domain Translation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/062ddb6c727310e76b6200b7c71f63b5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/062ddb6c727310e76b6200b7c71f63b5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/062ddb6c727310e76b6200b7c71f63b5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/062ddb6c727310e76b6200b7c71f63b5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/062ddb6c727310e76b6200b7c71f63b5-Reviews.html", "metareview": "", "pdf_size": 7828205, "gs_citation": 160, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16456724842379503316&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "The School of Computer Science, Tel Aviv University, Israel; The School of Computer Science, Tel Aviv University, Israel + Facebook AI Research", "aff_domain": ";", "email": ";", "github": "https://github.com/sagiebenaim/OneShotTranslation", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/062ddb6c727310e76b6200b7c71f63b5-Abstract.html", "aff_unique_index": "0;0+1", "aff_unique_norm": "Tel Aviv University;Meta", "aff_unique_dep": "School of Computer Science;Facebook AI Research", "aff_unique_url": "https://www.tau.ac.il;https://research.facebook.com", "aff_unique_abbr": "TAU;FAIR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Tel Aviv;", "aff_country_unique_index": "0;0+1", "aff_country_unique": "Israel;United States" }, { "title": "Online Adaptive Methods, Universality and Acceleration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11628", "id": "11628", "author_site": "Kfir Y. Levy, Alp Yurtsever, Volkan Cevher", "author": "Kfir Y. Levy; Alp Yurtsever; Volkan Cevher", "abstract": "We present a novel method for convex unconstrained optimization that, without any modifications ensures: (1) accelerated convergence rate for smooth objectives, (2) standard convergence rate in the general (non-smooth) setting, and (3) standard convergence rate in the stochastic optimization setting.", "bibtex": "@inproceedings{NEURIPS2018_b0169350,\n author = {Levy, Kfir Y. and Yurtsever, Alp and Cevher, Volkan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online Adaptive Methods, Universality and Acceleration},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b0169350cd35566c47ba83c6ec1d6f82-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b0169350cd35566c47ba83c6ec1d6f82-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b0169350cd35566c47ba83c6ec1d6f82-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b0169350cd35566c47ba83c6ec1d6f82-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b0169350cd35566c47ba83c6ec1d6f82-Reviews.html", "metareview": "", "pdf_size": 542262, "gs_citation": 120, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15325280492497894623&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "ETH Zurich; EPFL; EPFL", "aff_domain": "inf.ethz.ch;epfl.ch;epfl.ch", "email": "inf.ethz.ch;epfl.ch;epfl.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b0169350cd35566c47ba83c6ec1d6f82-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "ETH Zurich;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.epfl.ch", "aff_unique_abbr": "ETHZ;EPFL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Online Improper Learning with an Approximation Oracle", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11551", "id": "11551", "author_site": "Elad Hazan, Wei Hu, Yuanzhi Li, Zhiyuan Li", "author": "Elad Hazan; Wei Hu; Yuanzhi Li; Zhiyuan Li", "abstract": "We study the following question: given an efficient approximation algorithm for an optimization problem, can we learn efficiently in the same setting? We give a formal affirmative answer to this question in the form of a reduction from online learning to offline approximate optimization using an efficient algorithm that guarantees near optimal regret. The algorithm is efficient in terms of the number of oracle calls to a given approximation oracle \u2013 it makes only logarithmically many such calls per iteration. This resolves an open question by Kalai and Vempala, and by Garber. Furthermore, our result applies to the more general improper learning problems.", "bibtex": "@inproceedings{NEURIPS2018_ad47a008,\n author = {Hazan, Elad and Hu, Wei and Li, Yuanzhi and Li, Zhiyuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online Improper Learning with an Approximation Oracle},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ad47a008a2f806aa6eb1b53852cd8b37-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ad47a008a2f806aa6eb1b53852cd8b37-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ad47a008a2f806aa6eb1b53852cd8b37-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ad47a008a2f806aa6eb1b53852cd8b37-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ad47a008a2f806aa6eb1b53852cd8b37-Reviews.html", "metareview": "", "pdf_size": 559060, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4337651561670897981&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Princeton University + Google AI Princeton; Princeton University; Stanford University; Princeton University", "aff_domain": "cs.princeton.edu;cs.princeton.edu;stanford.edu;cs.princeton.edu", "email": "cs.princeton.edu;cs.princeton.edu;stanford.edu;cs.princeton.edu", "github": "", "project": "https://arxiv.org/abs/1804.07837", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ad47a008a2f806aa6eb1b53852cd8b37-Abstract.html", "aff_unique_index": "0+1;0;2;0", "aff_unique_norm": "Princeton University;Google;Stanford University", "aff_unique_dep": ";Google AI;", "aff_unique_url": "https://www.princeton.edu;https://ai.google;https://www.stanford.edu", "aff_unique_abbr": "Princeton;Google AI;Stanford", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Princeton;Stanford", "aff_country_unique_index": "0+0;0;0;0", "aff_country_unique": "United States" }, { "title": "Online Learning of Quantum States", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11855", "id": "11855", "author_site": "Scott Aaronson, Xinyi Chen, Elad Hazan, Satyen Kale, Ashwin Nayak", "author": "Scott Aaronson; Xinyi Chen; Elad Hazan; Satyen Kale; Ashwin Nayak", "abstract": "Suppose we have many copies of an unknown n-qubit state $\\rho$. We measure some copies of $\\rho$ using a known two-outcome measurement E_1, then other copies using a measurement E_2, and so on. At each stage t, we generate a current hypothesis $\\omega_t$ about the state $\\rho$, using the outcomes of the previous measurements. We show that it is possible to do this in a way that guarantees that $|\\trace(E_i \\omega_t) - \\trace(E_i\\rho)|$, the error in our prediction for the next measurement, is at least $eps$ at most $O(n / eps^2) $\\ times. Even in the non-realizable setting---where there could be arbitrary noise in the measurement outcomes---we show how to output hypothesis states that incur at most $O(\\sqrt {Tn}) $ excess loss over the best possible state on the first $T$ measurements. These results generalize a 2007 theorem by Aaronson on the PAC-learnability of quantum states, to the online and regret-minimization settings. We give three different ways to prove our results---using convex optimization, quantum postselection, and sequential fat-shattering dimension---which have different advantages in terms of parameters and portability.", "bibtex": "@inproceedings{NEURIPS2018_c1a3d347,\n author = {Aaronson, Scott and Chen, Xinyi and Hazan, Elad and Kale, Satyen and Nayak, Ashwin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online Learning of Quantum States},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c1a3d34711ab5d85335331ca0e57f067-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c1a3d34711ab5d85335331ca0e57f067-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c1a3d34711ab5d85335331ca0e57f067-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c1a3d34711ab5d85335331ca0e57f067-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c1a3d34711ab5d85335331ca0e57f067-Reviews.html", "metareview": "", "pdf_size": 397148, "gs_citation": 110, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3927070956753032766&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 17, "aff": "UT Austin; Google AI Princeton; Princeton University + Google AI Princeton; Google AI, New York; University of Waterloo", "aff_domain": "cs.utexas.edu;google.com;cs.princeton.edu;google.com;uwaterloo.ca", "email": "cs.utexas.edu;google.com;cs.princeton.edu;google.com;uwaterloo.ca", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c1a3d34711ab5d85335331ca0e57f067-Abstract.html", "aff_unique_index": "0;1;2+1;1;3", "aff_unique_norm": "University of Texas at Austin;Google;Princeton University;University of Waterloo", "aff_unique_dep": ";Google AI;;", "aff_unique_url": "https://www.utexas.edu;https://ai.google;https://www.princeton.edu;https://uwaterloo.ca", "aff_unique_abbr": "UT Austin;Google AI;Princeton;UW", "aff_campus_unique_index": "0;1;1;3", "aff_campus_unique": "Austin;Princeton;;New York", "aff_country_unique_index": "0;0;0+0;0;1", "aff_country_unique": "United States;Canada" }, { "title": "Online Learning with an Unknown Fairness Metric", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11268", "id": "11268", "author_site": "Stephen Gillen, Christopher Jung, Michael Kearns, Aaron Roth", "author": "Stephen Gillen; Christopher Jung; Michael Kearns; Aaron Roth", "abstract": "We consider the problem of online learning in the linear contextual bandits setting, but in which there are also strong individual fairness constraints governed by an unknown similarity metric. These constraints demand that we select similar actions or individuals with approximately equal probability DHPRZ12, which may be at odds with optimizing reward, thus modeling settings where profit and social policy are in tension. We assume we learn about an unknown Mahalanobis similarity metric from only weak feedback that identifies fairness violations, but does not quantify their extent. This is intended to represent the interventions of a regulator who \"knows unfairness when he sees it\" but nevertheless cannot enunciate a quantitative fairness metric over individuals. Our main result is an algorithm in the adversarial context setting that has a number of fairness violations that depends only logarithmically on T, while obtaining an optimal O(sqrt(T)) regret bound to the best fair policy.", "bibtex": "@inproceedings{NEURIPS2018_50905d7b,\n author = {Gillen, Stephen and Jung, Christopher and Kearns, Michael and Roth, Aaron},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online Learning with an Unknown Fairness Metric},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/50905d7b2216bfeccb5b41016357176b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/50905d7b2216bfeccb5b41016357176b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/50905d7b2216bfeccb5b41016357176b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/50905d7b2216bfeccb5b41016357176b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/50905d7b2216bfeccb5b41016357176b-Reviews.html", "metareview": "", "pdf_size": 322288, "gs_citation": 188, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10105322084567709783&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of Pennsylvania; University of Pennsylvania; University of Pennsylvania; University of Pennsylvania", "aff_domain": "math.upenn.edu;cis.upenn.edu;cis.upenn.edu;cis.upenn.edu", "email": "math.upenn.edu;cis.upenn.edu;cis.upenn.edu;cis.upenn.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/50905d7b2216bfeccb5b41016357176b-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Online Reciprocal Recommendation with Theoretical Performance Guarantees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11790", "id": "11790", "author_site": "Claudio Gentile, Nikos Parotsidis, Fabio Vitale", "author": "Fabio Vitale; Nikos Parotsidis; Claudio Gentile", "abstract": "A reciprocal recommendation problem is one where the goal of learning is not just to predict a user's preference towards a passive item (e.g., a book), but to recommend the targeted user on one side another user from the other side such that a mutual interest between the two exists. The problem thus is sharply different from the more traditional items-to-users recommendation, since a good match requires meeting the preferences of both users. We initiate a rigorous theoretical investigation of the reciprocal recommendation task in a specific framework of sequential learning. We point out general limitations, formulate reasonable assumptions enabling effective learning and, under these assumptions, we design and analyze a computationally efficient algorithm that uncovers mutual likes at a pace comparable to those achieved by a clairvoyant algorithm knowing all user preferences in advance. Finally, we validate our algorithm against synthetic and real-world datasets, showing improved empirical performance over simple baselines.", "bibtex": "@inproceedings{NEURIPS2018_97af07a1,\n author = {Vitale, Fabio and Parotsidis, Nikos and Gentile, Claudio},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online Reciprocal Recommendation with Theoretical Performance Guarantees},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/97af07a14cacba681feacf3012730892-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/97af07a14cacba681feacf3012730892-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/97af07a14cacba681feacf3012730892-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/97af07a14cacba681feacf3012730892-Reviews.html", "metareview": "", "pdf_size": 1196292, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2038132666053353557&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Sapienza University of Rome (Italy) & University of Lille (France) & INRIA Lille Nord Europe; University of Rome Tor Vergata, Rome, Italy; INRIA Lille & Google New York", "aff_domain": "inria.fr;uniroma2.it;gmail.com", "email": "inria.fr;uniroma2.it;gmail.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/97af07a14cacba681feacf3012730892-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Sapienza University of Rome;University of Rome Tor Vergata;INRIA Lille", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uniroma1.it;https://www.uniroma2.it;https://www.inria.fr", "aff_unique_abbr": "Sapienza;UniRoma2;INRIA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Rome;Lille", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Italy;France" }, { "title": "Online Robust Policy Learning in the Presence of Unknown Adversaries", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11941", "id": "11941", "author_site": "Aaron Havens, Zhanhong Jiang, Soumik Sarkar", "author": "Aaron Havens; Zhanhong Jiang; Soumik Sarkar", "abstract": "The growing prospect of deep reinforcement learning (DRL) being used in cyber-physical systems has raised concerns around safety and robustness of autonomous agents. Recent work on generating adversarial attacks have shown that it is computationally feasible for a bad actor to fool a DRL policy into behaving sub optimally. Although certain adversarial attacks with specific attack models have been addressed, most studies are only interested in off-line optimization in the data space (e.g., example fitting, distillation). This paper introduces a Meta-Learned Advantage Hierarchy (MLAH) framework that is attack model-agnostic and more suited to reinforcement learning, via handling the attacks in the decision space (as opposed to data space) and directly mitigating learned bias introduced by the adversary. In MLAH, we learn separate sub-policies (nominal and adversarial) in an online manner, as guided by a supervisory master agent that detects the presence of the adversary by leveraging the advantage function for the sub-policies. We demonstrate that the proposed algorithm enables policy learning with significantly lower bias as compared to the state-of-the-art policy learning approaches even in the presence of heavy state information attacks. We present algorithm analysis and simulation results using popular OpenAI Gym environments.", "bibtex": "@inproceedings{NEURIPS2018_8a36dfc6,\n author = {Havens, Aaron and Jiang, Zhanhong and Sarkar, Soumik},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online Robust Policy Learning in the Presence of Unknown Adversaries},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8a36dfc67ebfbbea9bd01cd8a4c8ad32-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8a36dfc67ebfbbea9bd01cd8a4c8ad32-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8a36dfc67ebfbbea9bd01cd8a4c8ad32-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8a36dfc67ebfbbea9bd01cd8a4c8ad32-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8a36dfc67ebfbbea9bd01cd8a4c8ad32-Reviews.html", "metareview": "", "pdf_size": 626855, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10678154192027752395&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Mechanical Engineering, Iowa State University; Department of Mechanical Engineering, Iowa State University; Department of Mechanical Engineering, Iowa State University", "aff_domain": "iastate.edu;iastate.edu;iastate.edu", "email": "iastate.edu;iastate.edu;iastate.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8a36dfc67ebfbbea9bd01cd8a4c8ad32-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "Department of Mechanical Engineering", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Online Structure Learning for Feed-Forward and Recurrent Sum-Product Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11669", "id": "11669", "author_site": "Agastya Kalra, Abdullah Rashwan, Wei-Shou Hsu, Pascal Poupart, Prashant Doshi, George Trimponias", "author": "Agastya Kalra; Abdullah Rashwan; Wei-Shou Hsu; Pascal Poupart; Prashant Doshi; Georgios Trimponias", "abstract": "Sum-product networks have recently emerged as an attractive representation due to their dual view as a special type of deep neural network with clear semantics and a special type of probabilistic graphical model for which inference is always tractable. Those properties follow from some conditions (i.e., completeness and decomposability) that must be respected by the structure of the network. As a result, it is not easy to specify a valid sum-product network by hand and therefore structure learning techniques are typically used in practice. This paper describes a new online structure learning technique for feed-forward and recurrent SPNs. The algorithm is demonstrated on real-world datasets with continuous features for which it is not clear what network architecture might be best, including sequence datasets of varying length.", "bibtex": "@inproceedings{NEURIPS2018_66121d1f,\n author = {Kalra, Agastya and Rashwan, Abdullah and Hsu, Wei-Shou and Poupart, Pascal and Doshi, Prashant and Trimponias, Georgios},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online Structure Learning for Feed-Forward and Recurrent Sum-Product Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/66121d1f782d29b62a286909165517bc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/66121d1f782d29b62a286909165517bc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/66121d1f782d29b62a286909165517bc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/66121d1f782d29b62a286909165517bc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/66121d1f782d29b62a286909165517bc-Reviews.html", "metareview": "", "pdf_size": 2181204, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16611782719886586407&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Cheriton School of Computer Science, Waterloo AI Institute, University of Waterloo, Canada; Cheriton School of Computer Science, Waterloo AI Institute, University of Waterloo, Canada; Vector Institute, Toronto, Canada; Cheriton School of Computer Science, Waterloo AI Institute, University of Waterloo, Canada; Department of Computer Science, University of Georgia, USA; Huawei Noah\u2019s Ark Lab, Hong Kong", "aff_domain": "gmail.com;uwaterloo.ca;uwaterloo.ca;uwaterloo.ca;cs.uga.edu;huawei.com", "email": "gmail.com;uwaterloo.ca;uwaterloo.ca;uwaterloo.ca;cs.uga.edu;huawei.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/66121d1f782d29b62a286909165517bc-Abstract.html", "aff_unique_index": "0;0;1;0;2;3", "aff_unique_norm": "University of Waterloo;Vector Institute;University of Georgia;Huawei", "aff_unique_dep": "Cheriton School of Computer Science;;Department of Computer Science;Huawei Noah\u2019s Ark Lab", "aff_unique_url": "https://uwaterloo.ca;https://vectorinstitute.ai;https://www.uga.edu;https://www.huawei.com/en/ai/noahs-ark-lab", "aff_unique_abbr": "UW;Vector Institute;UGA;Huawei Noah\u2019s Ark Lab", "aff_campus_unique_index": "0;0;1;0;3", "aff_campus_unique": "Waterloo;Toronto;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1;2", "aff_country_unique": "Canada;United States;China" }, { "title": "Online Structured Laplace Approximations for Overcoming Catastrophic Forgetting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11373", "id": "11373", "author_site": "Hippolyt Ritter, Aleksandar Botev, David Barber", "author": "Hippolyt Ritter; Aleksandar Botev; David Barber", "abstract": "We introduce the Kronecker factored online Laplace approximation for overcoming catastrophic forgetting in neural networks. The method is grounded in a Bayesian online learning framework, where we recursively approximate the posterior after every task with a Gaussian, leading to a quadratic penalty on changes to the weights. The Laplace approximation requires calculating the Hessian around a mode, which is typically intractable for modern architectures. In order to make our method scalable, we leverage recent block-diagonal Kronecker factored approximations to the curvature. Our algorithm achieves over 90% test accuracy across a sequence of 50 instantiations of the permuted MNIST dataset, substantially outperforming related methods for overcoming catastrophic forgetting.", "bibtex": "@inproceedings{NEURIPS2018_f31b2046,\n author = {Ritter, Hippolyt and Botev, Aleksandar and Barber, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online Structured Laplace Approximations for Overcoming Catastrophic Forgetting},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f31b20466ae89669f9741e047487eb37-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f31b20466ae89669f9741e047487eb37-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f31b20466ae89669f9741e047487eb37-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f31b20466ae89669f9741e047487eb37-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f31b20466ae89669f9741e047487eb37-Reviews.html", "metareview": "", "pdf_size": 520576, "gs_citation": 403, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15617292257444082103&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "University College London; University College London; University College London+Alan Turing Institute+reinfer.io", "aff_domain": "cs.ucl.ac.uk; ; ", "email": "cs.ucl.ac.uk; ; ", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f31b20466ae89669f9741e047487eb37-Abstract.html", "aff_unique_index": "0;0;0+1+2", "aff_unique_norm": "University College London;Alan Turing Institute;Reinfer", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.turing.ac.uk;https://www.reinfer.io", "aff_unique_abbr": "UCL;ATI;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+0+1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Online convex optimization for cumulative constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11595", "id": "11595", "author_site": "Jianjun Yuan, Andrew Lamperski", "author": "Jianjun Yuan; Andrew Lamperski", "abstract": "We propose the algorithms for online convex\n optimization which lead to cumulative squared constraint violations\n of the form\n $\\sum\\limits_{t=1}^T\\big([g(x_t)]_+\\big)^2=O(T^{1-\\beta})$, where\n $\\beta\\in(0,1)$. Previous literature has\n focused on long-term constraints of the form\n $\\sum\\limits_{t=1}^Tg(x_t)$. There, strictly feasible solutions\n can cancel out the effects of violated constraints.\n In contrast, the new form heavily penalizes large constraint\n violations and cancellation effects cannot occur. \n Furthermore, useful bounds on the single step constraint violation\n $[g(x_t)]_+$ are derived.\n For convex objectives, our regret bounds generalize\n existing bounds, and for strongly convex objectives we give improved\n regret bounds.\n In numerical experiments, we show that our algorithm closely follows\n the constraint boundary leading to low cumulative violation.", "bibtex": "@inproceedings{NEURIPS2018_9cb9ed4f,\n author = {Yuan, Jianjun and Lamperski, Andrew},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online convex optimization for cumulative constraints},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9cb9ed4f35cf7c2f295cc2bc6f732a84-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9cb9ed4f35cf7c2f295cc2bc6f732a84-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9cb9ed4f35cf7c2f295cc2bc6f732a84-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9cb9ed4f35cf7c2f295cc2bc6f732a84-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9cb9ed4f35cf7c2f295cc2bc6f732a84-Reviews.html", "metareview": "", "pdf_size": 5822574, "gs_citation": 135, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17649949418167933994&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Electrical and Computer Engineering, University of Minnesota; Department of Electrical and Computer Engineering, University of Minnesota", "aff_domain": "umn.edu;umn.edu", "email": "umn.edu;umn.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9cb9ed4f35cf7c2f295cc2bc6f732a84-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Minnesota", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.umn.edu", "aff_unique_abbr": "UMN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Optimal Algorithms for Continuous Non-monotone Submodular and DR-Submodular Maximization", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11912", "id": "11912", "author_site": "Rad Niazadeh, Tim Roughgarden, Joshua Wang", "author": "Rad Niazadeh; Tim Roughgarden; Joshua Wang", "abstract": "In this paper we study the fundamental problems of maximizing a continuous non monotone submodular function over a hypercube, with and without coordinate-wise concavity. This family of optimization problems has several applications in machine learning, economics, and communication systems. Our main result is the first 1/2 approximation algorithm for continuous submodular function maximization; this approximation factor of is the best possible for algorithms that use only polynomially many queries. For the special case of DR-submodular maximization, we provide a faster 1/2-approximation algorithm that runs in (almost) linear time. Both of these results improve upon prior work [Bian et al., 2017, Soma and Yoshida, 2017, Buchbinder et al., 2012].", "bibtex": "@inproceedings{NEURIPS2018_cdfa4c42,\n author = {Niazadeh, Rad and Roughgarden, Tim and Wang, Joshua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimal Algorithms for Continuous Non-monotone Submodular and DR-Submodular Maximization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cdfa4c42f465a5a66871587c69fcfa34-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cdfa4c42f465a5a66871587c69fcfa34-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cdfa4c42f465a5a66871587c69fcfa34-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cdfa4c42f465a5a66871587c69fcfa34-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cdfa4c42f465a5a66871587c69fcfa34-Reviews.html", "metareview": "", "pdf_size": 1186198, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4738752712573924507&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Stanford University, Stanford, CA 95130; Stanford University, Stanford, CA 95130; Google, Mountain View, CA 94043", "aff_domain": "cs.stanford.edu;cs.stanford.edu;google.com", "email": "cs.stanford.edu;cs.stanford.edu;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cdfa4c42f465a5a66871587c69fcfa34-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Optimal Algorithms for Non-Smooth Distributed Optimization in Networks", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11281", "id": "11281", "author_site": "Kevin Scaman, Francis Bach, Sebastien Bubeck, Laurent Massouli\u00e9, Yin Tat Lee", "author": "Kevin Scaman; Francis Bach; Sebastien Bubeck; Laurent Massouli\u00e9; Yin Tat Lee", "abstract": "In this work, we consider the distributed optimization of non-smooth convex functions using a network of computing units. We investigate this problem under two regularity assumptions: (1) the Lipschitz continuity of the global objective function, and (2) the Lipschitz continuity of local individual functions. Under the local regularity assumption, we provide the first optimal first-order decentralized algorithm called multi-step primal-dual (MSPD) and its corresponding optimal convergence rate. A notable aspect of this result is that, for non-smooth functions, while the dominant term of the error is in $O(1/\\sqrt{t})$, the structure of the communication network only impacts a second-order term in $O(1/t)$, where $t$ is time. In other words, the error due to limits in communication resources decreases at a fast rate even in the case of non-strongly-convex objective functions. Under the global regularity assumption, we provide a simple yet efficient algorithm called distributed randomized smoothing (DRS) based on a local smoothing of the objective function, and show that DRS is within a $d^{1/4}$ multiplicative factor of the optimal convergence rate, where $d$ is the underlying dimension.", "bibtex": "@inproceedings{NEURIPS2018_8fb21ee7,\n author = {Scaman, Kevin and Bach, Francis and Bubeck, Sebastien and Massouli\\'{e}, Laurent and Lee, Yin Tat},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimal Algorithms for Non-Smooth Distributed Optimization in Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8fb21ee7a2207526da55a679f0332de2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8fb21ee7a2207526da55a679f0332de2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8fb21ee7a2207526da55a679f0332de2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8fb21ee7a2207526da55a679f0332de2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8fb21ee7a2207526da55a679f0332de2-Reviews.html", "metareview": "", "pdf_size": 258566, "gs_citation": 194, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13664062490804615438&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Huawei Noah\u2019s Ark Lab; INRIA, Ecole Normale Sup\u00e9rieure, PSL Research University; Microsoft Research; University of Washington; MSR-INRIA Joint Centre", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8fb21ee7a2207526da55a679f0332de2-Abstract.html", "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "Huawei;INRIA;Microsoft;University of Washington", "aff_unique_dep": "Noah\u2019s Ark Lab;;Microsoft Research;", "aff_unique_url": "https://www.huawei.com;https://www.inria.fr;https://www.microsoft.com/en-us/research;https://www.washington.edu", "aff_unique_abbr": "Huawei;INRIA;MSR;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;1", "aff_country_unique": "China;France;United States" }, { "title": "Optimal Subsampling with Influence Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11365", "id": "11365", "author_site": "Daniel Ting, Eric Brochu", "author": "Daniel Ting; Eric Brochu", "abstract": "Subsampling is a common and often effective method to deal with the computational challenges of large datasets. However, for most statistical models, there is no well-motivated approach for drawing a non-uniform subsample. We show that the concept of an asymptotically linear estimator and the associated influence function leads to asymptotically optimal sampling probabilities for a wide class of popular models. This is the only tight optimality result for subsampling we are aware of as other methods only provide probabilistic error bounds or optimal rates. \nFurthermore, for linear regression models, which have well-studied procedures for non-uniform subsampling, we empirically show our optimal influence function based method outperforms previous approaches even when using approximations to the optimal probabilities.", "bibtex": "@inproceedings{NEURIPS2018_57c0531e,\n author = {Ting, Daniel and Brochu, Eric},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimal Subsampling with Influence Functions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/57c0531e13f40b91b3b0f1a30b529a1d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/57c0531e13f40b91b3b0f1a30b529a1d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/57c0531e13f40b91b3b0f1a30b529a1d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/57c0531e13f40b91b3b0f1a30b529a1d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/57c0531e13f40b91b3b0f1a30b529a1d-Reviews.html", "metareview": "", "pdf_size": 659228, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4921049346602152246&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Tableau Software, Seattle, WA, USA; Tableau Software, Vancouver, BC, Canada", "aff_domain": "tableau.com;tableau.com", "email": "tableau.com;tableau.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/57c0531e13f40b91b3b0f1a30b529a1d-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Tableau Software", "aff_unique_dep": "", "aff_unique_url": "https://www.tableau.com", "aff_unique_abbr": "Tableau", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Seattle;Vancouver", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Canada" }, { "title": "Optimistic optimization of a Brownian", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11306", "id": "11306", "author_site": "Jean-Bastien Grill, Michal Valko, Remi Munos", "author": "Jean-Bastien Grill; Michal Valko; Remi Munos", "abstract": "We address the problem of optimizing a Brownian motion. We consider a (random) realization $W$ of a Brownian motion with input space in $[0,1]$. Given $W$, our goal is to return an $\\epsilon$-approximation of its maximum using the smallest possible number of function evaluations, the sample complexity of the algorithm. We provide an algorithm with sample complexity of order $\\log^2(1/\\epsilon)$. This improves over previous results of Al-Mharmah and Calvin (1996) and Calvin et al. (2017) which provided only polynomial rates. Our algorithm is adaptive---each query depends on previous values---and is an instance of the optimism-in-the-face-of-uncertainty principle.", "bibtex": "@inproceedings{NEURIPS2018_b132ecc1,\n author = {Grill, Jean-Bastien and Valko, Michal and Munos, Remi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimistic optimization of a Brownian},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b132ecc1609bfcf302615847c1caa69a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b132ecc1609bfcf302615847c1caa69a-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b132ecc1609bfcf302615847c1caa69a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b132ecc1609bfcf302615847c1caa69a-Reviews.html", "metareview": "", "pdf_size": 1322903, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4445920156156523054&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "SequeL team, INRIA Lille - Nord Europe, France + DeepMind Paris, France; SequeL team, INRIA Lille - Nord Europe, France + DeepMind Paris, France; SequeL team, INRIA Lille - Nord Europe, France + DeepMind Paris, France", "aff_domain": "google.com;inria.fr;google.com", "email": "google.com;inria.fr;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b132ecc1609bfcf302615847c1caa69a-Abstract.html", "aff_unique_index": "0+1;0+1;0+1", "aff_unique_norm": "INRIA Lille - Nord Europe;DeepMind", "aff_unique_dep": "SequeL team;", "aff_unique_url": "https://www.inria.fr/en/centre/lille-nord-europe;https://deepmind.com", "aff_unique_abbr": "INRIA;DeepMind", "aff_campus_unique_index": "0+1;0+1;0+1", "aff_campus_unique": "Lille;Paris", "aff_country_unique_index": "0+0;0+0;0+0", "aff_country_unique": "France" }, { "title": "Optimization for Approximate Submodularity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11064", "id": "11064", "author_site": "Yaron Singer, Avinatan Hassidim", "author": "Yaron Singer; Avinatan Hassidim", "abstract": "We consider the problem of maximizing a submodular function when given access to its approximate version. Submodular functions are heavily studied in a wide variety of disciplines, since they are used to model many real world phenomena, and are amenable to optimization. However, there are many cases in which the phenomena we observe is only approximately submodular and the approximation guarantees cease to hold. We describe a technique which we call the sampled\nmean approximation that yields strong guarantees for maximization of submodular functions from approximate surrogates under cardinality and intersection of matroid constraints. In particular, we show tight guarantees for maximization under a cardinality constraint and 1/(1+P) approximation\nunder intersection of P matroids.", "bibtex": "@inproceedings{NEURIPS2018_cfa0860e,\n author = {Singer, Yaron and Hassidim, Avinatan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimization for Approximate Submodularity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cfa0860e83a4c3a763a7e62d825349f7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cfa0860e83a4c3a763a7e62d825349f7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cfa0860e83a4c3a763a7e62d825349f7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cfa0860e83a4c3a763a7e62d825349f7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cfa0860e83a4c3a763a7e62d825349f7-Reviews.html", "metareview": "", "pdf_size": 564783, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15719285506090897274&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 0, "aff": "Bar Ilan University and Google; Harvard University", "aff_domain": "cs.biu.ac.il;seas.harvard.edu", "email": "cs.biu.ac.il;seas.harvard.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cfa0860e83a4c3a763a7e62d825349f7-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Bar-Ilan University;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.biu.ac.il;https://www.harvard.edu", "aff_unique_abbr": "BIU;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Israel;United States" }, { "title": "Optimization of Smooth Functions with Noisy Observations: Local Minimax Rates", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11429", "id": "11429", "author_site": "Yining Wang, Sivaraman Balakrishnan, Aarti Singh", "author": "Yining Wang; Sivaraman Balakrishnan; Aarti Singh", "abstract": "We consider the problem of global optimization of an unknown non-convex smooth function with noisy zeroth-order feedback. We propose a local minimax framework to study the fundamental difficulty of optimizing smooth functions with adaptive function evaluations. We show that for functions with fast growth around their global minima, carefully designed optimization algorithms can identify a near global minimizer with many fewer queries than worst-case global minimax theory predicts. For the special case of strongly convex and smooth functions, our implied convergence rates match the ones developed for zeroth-order convex optimization problems. On the other hand, we show that in the worst case no algorithm can converge faster than the minimax rate of estimating an unknown functions in linf-norm. Finally, we show that non-adaptive algorithms, although optimal in a global minimax sense, do not attain the optimal local minimax rate.", "bibtex": "@inproceedings{NEURIPS2018_4ba3c163,\n author = {Wang, Yining and Balakrishnan, Sivaraman and Singh, Aarti},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimization of Smooth Functions with Noisy Observations: Local Minimax Rates},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4ba3c163cd1efd4c14e3a415fa0a3010-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4ba3c163cd1efd4c14e3a415fa0a3010-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4ba3c163cd1efd4c14e3a415fa0a3010-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4ba3c163cd1efd4c14e3a415fa0a3010-Reviews.html", "metareview": "", "pdf_size": 550354, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9634622348757501879&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Department of Machine Learning and Statistics, Carnegie Mellon University, Pittsburgh, PA, 15213, USA; Department of Machine Learning and Statistics, Carnegie Mellon University, Pittsburgh, PA, 15213, USA; Department of Machine Learning and Statistics, Carnegie Mellon University, Pittsburgh, PA, 15213, USA", "aff_domain": "cs.cmu.edu;stat.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;stat.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4ba3c163cd1efd4c14e3a415fa0a3010-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Department of Machine Learning and Statistics", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Optimization over Continuous and Multi-dimensional Decisions with Observational Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11302", "id": "11302", "author_site": "Dimitris Bertsimas, Christopher McCord", "author": "Dimitris Bertsimas; Christopher McCord", "abstract": "We consider the optimization of an uncertain objective over continuous and multi-dimensional decision spaces in problems in which we are only provided with observational data. We propose a novel algorithmic framework that is tractable, asymptotically consistent, and superior to comparable methods on example problems. Our approach leverages predictive machine learning methods and incorporates information on the uncertainty of the predicted outcomes for the purpose of prescribing decisions. We demonstrate the efficacy of our method on examples involving both synthetic and real data sets.", "bibtex": "@inproceedings{NEURIPS2018_f337d999,\n author = {Bertsimas, Dimitris and McCord, Christopher},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimization over Continuous and Multi-dimensional Decisions with Observational Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f337d999d9ad116a7b4f3d409fcc6480-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f337d999d9ad116a7b4f3d409fcc6480-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f337d999d9ad116a7b4f3d409fcc6480-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f337d999d9ad116a7b4f3d409fcc6480-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f337d999d9ad116a7b4f3d409fcc6480-Reviews.html", "metareview": "", "pdf_size": 357137, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6963843439478717138&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Sloan School of Management, Massachusetts Institute of Technology, Cambridge, MA 02142; Operations Research Center, Massachusetts Institute of Technology, Cambridge, MA 02142", "aff_domain": "mit.edu;mit.edu", "email": "mit.edu;mit.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f337d999d9ad116a7b4f3d409fcc6480-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Sloan School of Management", "aff_unique_url": "https://mitsloan.mit.edu/", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Orthogonally Decoupled Variational Gaussian Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11832", "id": "11832", "author_site": "Hugh Salimbeni, Ching-An Cheng, Byron Boots, Marc Deisenroth", "author": "Hugh Salimbeni; Ching-An Cheng; Byron Boots; Marc Deisenroth", "abstract": "Gaussian processes (GPs) provide a powerful non-parametric framework for reasoning over functions. Despite appealing theory, its superlinear computational and memory complexities have presented a long-standing challenge. State-of-the-art sparse variational inference methods trade modeling accuracy against complexity. However, the complexities of these methods still scale superlinearly in the number of basis functions, implying that that sparse GP methods are able to learn from large datasets only when a small model is used. Recently, a decoupled approach was proposed that removes the unnecessary coupling between the complexities of modeling the mean and the covariance functions of a GP. It achieves a linear complexity in the number of mean parameters, so an expressive posterior mean function can be modeled. While promising, this approach suffers from optimization difficulties due to ill-conditioning and non-convexity. In this work, we propose an alternative decoupled parametrization. It adopts an orthogonal basis in the mean function to model the residues that cannot be learned by the standard coupled approach. Therefore, our method extends, rather than replaces, the coupled approach to achieve strictly better performance. This construction admits a straightforward natural gradient update rule, so the structure of the information manifold that is lost during decoupling can be leveraged to speed up learning. Empirically, our algorithm demonstrates significantly faster convergence in multiple experiments.", "bibtex": "@inproceedings{NEURIPS2018_cc638784,\n author = {Salimbeni, Hugh and Cheng, Ching-An and Boots, Byron and Deisenroth, Marc},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Orthogonally Decoupled Variational Gaussian Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cc638784cf213986ec75983a4aa08cdb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cc638784cf213986ec75983a4aa08cdb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cc638784cf213986ec75983a4aa08cdb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cc638784cf213986ec75983a4aa08cdb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cc638784cf213986ec75983a4aa08cdb-Reviews.html", "metareview": "", "pdf_size": 431670, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13926573353559028690&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 13, "aff": "Imperial College London; Georgia Institute of Technology; Georgia Institute of Technology; Imperial College London", "aff_domain": "ic.ac.uk;gatech.edu;gatech.edu;ic.ac.uk", "email": "ic.ac.uk;gatech.edu;gatech.edu;ic.ac.uk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cc638784cf213986ec75983a4aa08cdb-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Imperial College London;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.imperial.ac.uk;https://www.gatech.edu", "aff_unique_abbr": "ICL;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Out of the Box: Reasoning with Graph Convolution Nets for Factual Visual Question Answering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11273", "id": "11273", "author_site": "Medhini Narasimhan, Svetlana Lazebnik, Alex Schwing", "author": "Medhini Narasimhan; Svetlana Lazebnik; Alexander Schwing", "abstract": "Accurately answering a question about a given image requires combining observations with general knowledge. While this is effortless for humans, reasoning with general knowledge remains an algorithmic challenge. To advance research in this direction a novel", "bibtex": "@inproceedings{NEURIPS2018_c26820b8,\n author = {Narasimhan, Medhini and Lazebnik, Svetlana and Schwing, Alexander},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Out of the Box: Reasoning with Graph Convolution Nets for Factual Visual Question Answering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c26820b8a4c1b3c2aa868d6d57e14a79-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c26820b8a4c1b3c2aa868d6d57e14a79-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c26820b8a4c1b3c2aa868d6d57e14a79-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c26820b8a4c1b3c2aa868d6d57e14a79-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c26820b8a4c1b3c2aa868d6d57e14a79-Reviews.html", "metareview": "", "pdf_size": 2565236, "gs_citation": 294, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3658580239219021626&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "University of Illinois Urbana-Champaign; University of Illinois Urbana-Champaign; University of Illinois Urbana-Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu", "email": "illinois.edu;illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c26820b8a4c1b3c2aa868d6d57e14a79-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Out-of-Distribution Detection using Multiple Semantic Label Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11710", "id": "11710", "author_site": "Gabi Shalev, Yossi Adi, Joseph Keshet", "author": "Gabi Shalev; Yossi Adi; Joseph Keshet", "abstract": "Deep Neural Networks are powerful models that attained remarkable results on a variety of tasks. These models are shown to be extremely efficient when training and test data are drawn from the same distribution. However, it is not clear how a network will act when it is fed with an out-of-distribution example. In this work, we consider the problem of out-of-distribution detection in neural networks. We propose to use multiple semantic dense representations instead of sparse representation as the target label. Specifically, we propose to use several word representations obtained from different corpora or architectures as target labels. We evaluated the proposed model on computer vision, and speech commands detection tasks and compared it to previous methods. Results suggest that our method compares favorably with previous work. Besides, we present the efficiency of our approach for detecting wrongly classified and adversarial examples.", "bibtex": "@inproceedings{NEURIPS2018_2151b4c7,\n author = {Shalev, Gabi and Adi, Yossi and Keshet, Joseph},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Out-of-Distribution Detection using Multiple Semantic Label Representations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2151b4c76b4dcb048d06a5c32942b6f6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2151b4c76b4dcb048d06a5c32942b6f6-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2151b4c76b4dcb048d06a5c32942b6f6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2151b4c76b4dcb048d06a5c32942b6f6-Reviews.html", "metareview": "", "pdf_size": 1883142, "gs_citation": 104, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5945973689430237155&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Bar-Ilan University, Israel; Bar-Ilan University, Israel; Bar-Ilan University, Israel", "aff_domain": "gmail.com;gmail.com;cs.biu.ac.il", "email": "gmail.com;gmail.com;cs.biu.ac.il", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2151b4c76b4dcb048d06a5c32942b6f6-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Bar-Ilan University", "aff_unique_dep": "", "aff_unique_url": "https://www.biu.ac.il", "aff_unique_abbr": "BIU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "Overcoming Language Priors in Visual Question Answering with Adversarial Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11169", "id": "11169", "author_site": "Sainandan Ramakrishnan, Aishwarya Agrawal, Stefan Lee", "author": "Sainandan Ramakrishnan; Aishwarya Agrawal; Stefan Lee", "abstract": "Modern Visual Question Answering (VQA) models have been shown to rely heavily on superficial correlations between question and answer words learned during training -- \\eg overwhelmingly reporting the type of room as kitchen or the sport being played as tennis, irrespective of the image. Most alarmingly, this shortcoming is often not well reflected during evaluation because the same strong priors exist in test distributions; however, a VQA system that fails to ground questions in image content would likely perform poorly in real-world settings.", "bibtex": "@inproceedings{NEURIPS2018_67d96d45,\n author = {Ramakrishnan, Sainandan and Agrawal, Aishwarya and Lee, Stefan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Overcoming Language Priors in Visual Question Answering with Adversarial Regularization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/67d96d458abdef21792e6d8e590244e7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/67d96d458abdef21792e6d8e590244e7-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/67d96d458abdef21792e6d8e590244e7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/67d96d458abdef21792e6d8e590244e7-Reviews.html", "metareview": "", "pdf_size": 1574206, "gs_citation": 289, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7898092754875329657&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Georgia Institute of Technology; Georgia Institute of Technology; Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu", "email": "gatech.edu;gatech.edu;gatech.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/67d96d458abdef21792e6d8e590244e7-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Overfitting or perfect fitting? Risk bounds for classification and regression rules that interpolate", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11240", "id": "11240", "author_site": "Mikhail Belkin, Daniel Hsu, Partha P Mitra", "author": "Mikhail Belkin; Daniel J. Hsu; Partha Mitra", "abstract": "Many modern machine learning models are trained to achieve zero or near-zero training error in order to obtain near-optimal (but non-zero) test error. This phenomenon of strong generalization performance for ``overfitted'' / interpolated classifiers appears to be ubiquitous in high-dimensional data, having been observed in deep networks, kernel machines, boosting and random forests. Their performance is consistently robust even when the data contain large amounts of label noise. \n\nVery little theory is available to explain these observations. The vast majority of theoretical analyses of generalization allows for interpolation only when there is little or no label noise. This paper takes a step toward a theoretical foundation for interpolated classifiers by analyzing local interpolating schemes, including geometric simplicial interpolation algorithm and singularly weighted $k$-nearest neighbor schemes. Consistency or near-consistency is proved for these schemes in classification and regression problems. Moreover, the nearest neighbor schemes exhibit optimal rates under some standard statistical assumptions.\n\nFinally, this paper suggests a way to explain the phenomenon of adversarial examples, which are seemingly ubiquitous in modern machine learning, and also discusses some connections to kernel machines and random forests in the interpolated regime.", "bibtex": "@inproceedings{NEURIPS2018_e2231217,\n author = {Belkin, Mikhail and Hsu, Daniel J and Mitra, Partha},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Overfitting or perfect fitting? Risk bounds for classification and regression rules that interpolate},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e22312179bf43e61576081a2f250f845-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e22312179bf43e61576081a2f250f845-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e22312179bf43e61576081a2f250f845-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e22312179bf43e61576081a2f250f845-Reviews.html", "metareview": "", "pdf_size": 666212, "gs_citation": 359, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16748710437613247237&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "The Ohio State University; Columbia University; Cold Spring Harbor Laboratory", "aff_domain": "cse.ohio-state.edu;cs.columbia.edu;cshl.edu", "email": "cse.ohio-state.edu;cs.columbia.edu;cshl.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e22312179bf43e61576081a2f250f845-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Ohio State University;Columbia University;Cold Spring Harbor Laboratory", "aff_unique_dep": ";;", "aff_unique_url": "https://www.osu.edu;https://www.columbia.edu;https://www.cshl.edu", "aff_unique_abbr": "OSU;Columbia;CSHL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Overlapping Clustering Models, and One (class) SVM to Bind Them All", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11224", "id": "11224", "author_site": "Xueyu Mao, Purnamrita Sarkar, Deepayan Chakrabarti", "author": "Xueyu Mao; Purnamrita Sarkar; Deepayan Chakrabarti", "abstract": "People belong to multiple communities, words belong to multiple topics, and books cover multiple genres; overlapping clusters are commonplace. Many existing overlapping clustering methods model each person (or word, or book) as a non-negative weighted combination of \"exemplars\" who belong solely to one community, with some small noise. Geometrically, each person is a point on a cone whose corners are these exemplars. This basic form encompasses the widely used Mixed Membership Stochastic Blockmodel of networks and its degree-corrected variants, as well as topic models such as LDA. We show that a simple one-class SVM yields provably consistent parameter inference for all such models, and scales to large datasets. Experimental results on several simulated and real datasets show our algorithm (called SVM-cone) is both accurate and scalable.", "bibtex": "@inproceedings{NEURIPS2018_731c83db,\n author = {Mao, Xueyu and Sarkar, Purnamrita and Chakrabarti, Deepayan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Overlapping Clustering Models, and One (class) SVM to Bind Them All},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/731c83db8d2ff01bdc000083fd3c3740-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/731c83db8d2ff01bdc000083fd3c3740-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/731c83db8d2ff01bdc000083fd3c3740-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/731c83db8d2ff01bdc000083fd3c3740-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/731c83db8d2ff01bdc000083fd3c3740-Reviews.html", "metareview": "", "pdf_size": 899946, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4862340826936990460&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "The University of Texas at Austin; The University of Texas at Austin; The University of Texas at Austin", "aff_domain": "cs.utexas.edu;austin.utexas.edu;utexas.edu", "email": "cs.utexas.edu;austin.utexas.edu;utexas.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/731c83db8d2ff01bdc000083fd3c3740-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PAC-Bayes Tree: weighted subtrees with guarantees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11902", "id": "11902", "author_site": "Tin Nguyen, Samory Kpotufe", "author": "Tin D Nguyen; Samory Kpotufe", "abstract": "We present a weighted-majority classification approach over subtrees of a fixed tree, which provably achieves excess-risk of the same order as the best tree-pruning. Furthermore, the computational efficiency of pruning is maintained at both training and testing time despite having to aggregate over an exponential number of subtrees. We believe this is the first subtree aggregation approach with such guarantees.", "bibtex": "@inproceedings{NEURIPS2018_1819020b,\n author = {Nguyen, Tin D and Kpotufe, Samory},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {PAC-Bayes Tree: weighted subtrees with guarantees},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1819020b02e926785cf3be594d957696-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1819020b02e926785cf3be594d957696-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1819020b02e926785cf3be594d957696-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1819020b02e926785cf3be594d957696-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1819020b02e926785cf3be594d957696-Reviews.html", "metareview": "", "pdf_size": 403506, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:2nH0agxYgzMJ:scholar.google.com/&scioq=PAC-Bayes+Tree:+weighted+subtrees+with+guarantees&hl=en&as_sdt=0,5", "gs_version_total": 6, "aff": "MIT EECS + Princeton University ORFE; Princeton University ORFE", "aff_domain": "mit.edu;princeton.edu", "email": "mit.edu;princeton.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1819020b02e926785cf3be594d957696-Abstract.html", "aff_unique_index": "0+1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Princeton University", "aff_unique_dep": "Electrical Engineering & Computer Science;Operations Research and Financial Engineering", "aff_unique_url": "https://web.mit.edu;https://www.princeton.edu", "aff_unique_abbr": "MIT;Princeton", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0+0;0", "aff_country_unique": "United States" }, { "title": "PAC-Bayes bounds for stable algorithms with instance-dependent priors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11878", "id": "11878", "author_site": "Omar Rivasplata, Emilio Parrado-Hernandez, John Shawe-Taylor, Shiliang Sun, Csaba Szepesvari", "author": "Omar Rivasplata; Emilio Parrado-Hernandez; John S Shawe-Taylor; Shiliang Sun; Csaba Szepesvari", "abstract": "PAC-Bayes bounds have been proposed to get risk estimates based on a training sample. In this paper the PAC-Bayes approach is combined with stability of the hypothesis learned by a Hilbert space valued algorithm. The PAC-Bayes setting is used with a Gaussian prior centered at the expected output. Thus a novelty of our paper is using priors defined in terms of the data-generating distribution. Our main result estimates the risk of the randomized algorithm in terms of the hypothesis stability coefficients. We also provide a new bound for the SVM classifier, which is compared to other known bounds experimentally. Ours appears to be the first uniform hypothesis stability-based bound that evaluates to non-trivial values.", "bibtex": "@inproceedings{NEURIPS2018_38685413,\n author = {Rivasplata, Omar and Parrado-Hernandez, Emilio and Shawe-Taylor, John S and Sun, Shiliang and Szepesvari, Csaba},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {PAC-Bayes bounds for stable algorithms with instance-dependent priors},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/386854131f58a556343e056f03626e00-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/386854131f58a556343e056f03626e00-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/386854131f58a556343e056f03626e00-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/386854131f58a556343e056f03626e00-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/386854131f58a556343e056f03626e00-Reviews.html", "metareview": "", "pdf_size": 685093, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18071313249798615425&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "UCL; University Carlos III of Madrid; UCL; East China Normal University; DeepMind", "aff_domain": "ucl.ac.uk;uc3m.es;cs.ucl.ac.uk;cs.ecnu.edu.cn;deepmind.com", "email": "ucl.ac.uk;uc3m.es;cs.ucl.ac.uk;cs.ecnu.edu.cn;deepmind.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/386854131f58a556343e056f03626e00-Abstract.html", "aff_unique_index": "0;1;0;2;3", "aff_unique_norm": "University College London;Carlos III University of Madrid;East China Normal University;DeepMind", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.uc3m.es;http://www.ecnu.edu.cn;https://deepmind.com", "aff_unique_abbr": "UCL;UC3M;ECNU;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0", "aff_country_unique": "United Kingdom;Spain;China" }, { "title": "PAC-learning in the presence of adversaries", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11049", "id": "11049", "author_site": "Daniel Cullina, Arjun Nitin Bhagoji, Prateek Mittal", "author": "Daniel Cullina; Arjun Nitin Bhagoji; Prateek Mittal", "abstract": "The existence of evasion attacks during the test phase of machine learning algorithms represents a significant challenge to both their deployment and understanding. These attacks can be carried out by adding imperceptible perturbations to inputs to generate adversarial examples and finding effective defenses and detectors has proven to be difficult. In this paper, we step away from the attack-defense arms race and seek to understand the limits of what can be learned in the presence of an evasion adversary. In particular, we extend the Probably Approximately Correct (PAC)-learning framework to account for the presence of an adversary. We first define corrupted hypothesis classes which arise from standard binary hypothesis classes in the presence of an evasion adversary and derive the Vapnik-Chervonenkis (VC)-dimension for these, denoted as the adversarial VC-dimension. We then show that sample complexity upper bounds from the Fundamental Theorem of Statistical learning can be extended to the case of evasion adversaries, where the sample complexity is controlled by the adversarial VC-dimension. We then explicitly derive the adversarial VC-dimension for halfspace classifiers in the presence of a sample-wise norm-constrained adversary of the type commonly studied for evasion attacks and show that it is the same as the standard VC-dimension, closing an open question. Finally, we prove that the adversarial VC-dimension can be either larger or smaller than the standard VC-dimension depending on the hypothesis class and adversary, making it an interesting object of study in its own right.", "bibtex": "@inproceedings{NEURIPS2018_8f855179,\n author = {Cullina, Daniel and Bhagoji, Arjun Nitin and Mittal, Prateek},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {PAC-learning in the presence of adversaries},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8f85517967795eeef66c225f7883bdcb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8f85517967795eeef66c225f7883bdcb-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8f85517967795eeef66c225f7883bdcb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8f85517967795eeef66c225f7883bdcb-Reviews.html", "metareview": "", "pdf_size": 338486, "gs_citation": 167, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6369457949559522444&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Princeton University; Princeton University; Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu", "email": "princeton.edu;princeton.edu;princeton.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8f85517967795eeef66c225f7883bdcb-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PCA of high dimensional random walks with comparison to neural network training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11975", "id": "11975", "author_site": "Joseph Antognini, Jascha Sohl-Dickstein", "author": "Joseph Antognini; Jascha Sohl-Dickstein", "abstract": "One technique to visualize the training of neural networks is to perform PCA on the parameters over the course of training and to project to the subspace spanned by the first few PCA components. In this paper we compare this technique to the PCA of a high dimensional random walk. We compute the eigenvalues and eigenvectors of the covariance of the trajectory and prove that in the long trajectory and high dimensional limit most of the variance is in the first few PCA components, and that the projection of the trajectory onto any subspace spanned by PCA components is a Lissajous curve. We generalize these results to a random walk with momentum and to an Ornstein-Uhlenbeck processes (i.e., a random walk in a quadratic potential) and show that in high dimensions the walk is not mean reverting, but will instead be trapped at a fixed distance from the minimum. We finally analyze PCA projected training trajectories for: a linear model trained on CIFAR-10; a fully connected model trained on MNIST; and ResNet-50-v2 trained on Imagenet. In all cases, both the distribution of PCA eigenvalues and the projected trajectories resemble those of a random walk with drift.", "bibtex": "@inproceedings{NEURIPS2018_7a576629,\n author = {Antognini, Joseph and Sohl-Dickstein, Jascha},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {PCA of high dimensional random walks with comparison to neural network training},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7a576629fef88f3e636afd33b09e8289-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7a576629fef88f3e636afd33b09e8289-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7a576629fef88f3e636afd33b09e8289-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7a576629fef88f3e636afd33b09e8289-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7a576629fef88f3e636afd33b09e8289-Reviews.html", "metareview": "", "pdf_size": 313777, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15275854030742859849&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Whisper AI; Google Brain", "aff_domain": "gmail.com;google.com", "email": "gmail.com;google.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7a576629fef88f3e636afd33b09e8289-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Whisper AI;Google", "aff_unique_dep": ";Google Brain", "aff_unique_url": "https://www.whisper.ai;https://brain.google.com", "aff_unique_abbr": "Whisper AI;Google Brain", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "PG-TS: Improved Thompson Sampling for Logistic Contextual Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11455", "id": "11455", "author_site": "Bianca Dumitrascu, Karen Feng, Barbara Engelhardt", "author": "Bianca Dumitrascu; Karen Feng; Barbara Engelhardt", "abstract": "We address the problem of regret minimization in logistic contextual bandits, where a learner decides among sequential actions or arms given their respective contexts to maximize binary rewards. Using a fast inference procedure with Polya-Gamma distributed augmentation variables, we propose an improved version of Thompson Sampling, a Bayesian formulation of contextual bandits with near-optimal performance. Our approach, Polya-Gamma augmented Thompson Sampling (PG-TS), achieves state-of-the-art performance on simulated and real data. PG-TS explores the action space efficiently and exploits high-reward arms, quickly converging to solutions of low regret. Its explicit estimation of the posterior distribution of the context feature covariance leads to substantial empirical gains over approximate approaches. PG-TS is the first approach to demonstrate the benefits of Polya-Gamma augmentation in bandits and to propose an efficient Gibbs sampler for approximating the analytically unsolvable integral of logistic contextual bandits.", "bibtex": "@inproceedings{NEURIPS2018_ce6c9230,\n author = {Dumitrascu, Bianca and Feng, Karen and Engelhardt, Barbara},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {PG-TS: Improved Thompson Sampling for Logistic Contextual Bandits},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ce6c92303f38d297e263c7180f03d402-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ce6c92303f38d297e263c7180f03d402-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ce6c92303f38d297e263c7180f03d402-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ce6c92303f38d297e263c7180f03d402-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ce6c92303f38d297e263c7180f03d402-Reviews.html", "metareview": "", "pdf_size": 1945843, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16528819357029245171&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Lewis Sigler Institute for Integrative Genomics, Princeton University; Department of Computer Science, Princeton University; Department of Computer Science, Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu", "email": "princeton.edu;princeton.edu;princeton.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ce6c92303f38d297e263c7180f03d402-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "Lewis Sigler Institute for Integrative Genomics", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "0", "aff_campus_unique": "Princeton;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PacGAN: The power of two samples in generative adversarial networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11165", "id": "11165", "author_site": "Zinan Lin, Ashish Khetan, Giulia Fanti, Sewoong Oh", "author": "Zinan Lin; Ashish Khetan; Giulia Fanti; Sewoong Oh", "abstract": "Generative adversarial networks (GANs) are a technique for learning generative models of complex data distributions from samples. Despite remarkable advances in generating realistic images, a major shortcoming of GANs is the fact that they tend to produce samples with little diversity, even when trained on diverse datasets. This phenomenon, known as mode collapse, has been the focus of much recent work. We study a principled approach to handling mode collapse, which we call packing. The main idea is to modify the discriminator to make decisions based on multiple samples from the same class, either real or artificially generated. We draw analysis tools from binary hypothesis testing---in particular the seminal result of Blackwell---to prove a fundamental connection between packing and mode collapse. We show that packing naturally penalizes generators with mode collapse, thereby favoring generator distributions with less mode collapse during the training process. Numerical experiments on benchmark datasets suggest that packing provides significant improvements.", "bibtex": "@inproceedings{NEURIPS2018_288cc0ff,\n author = {Lin, Zinan and Khetan, Ashish and Fanti, Giulia and Oh, Sewoong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {PacGAN: The power of two samples in generative adversarial networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/288cc0ff022877bd3df94bc9360b9c5d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/288cc0ff022877bd3df94bc9360b9c5d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/288cc0ff022877bd3df94bc9360b9c5d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/288cc0ff022877bd3df94bc9360b9c5d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/288cc0ff022877bd3df94bc9360b9c5d-Reviews.html", "metareview": "", "pdf_size": 467903, "gs_citation": 461, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14705983068913748289&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "ECE Department, Carnegie Mellon University; IESE Department, University of Illinois at Urbana-Champaign; ECE Department, Carnegie Mellon University; IESE Department, University of Illinois at Urbana-Champaign", "aff_domain": "andrew.cmu.edu;gmail.com;andrew.cmu.edu;illinois.edu", "email": "andrew.cmu.edu;gmail.com;andrew.cmu.edu;illinois.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/288cc0ff022877bd3df94bc9360b9c5d-Abstract.html", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Carnegie Mellon University;University of Illinois Urbana-Champaign", "aff_unique_dep": "ECE Department;IESE Department", "aff_unique_url": "https://www.cmu.edu;https://www.illinois.edu", "aff_unique_abbr": "CMU;UIUC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Parameters as interacting particles: long time convergence and asymptotic error scaling of neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11688", "id": "11688", "author_site": "Grant Rotskoff, Eric Vanden-Eijnden", "author": "Grant Rotskoff; Eric Vanden-Eijnden", "abstract": "The performance of neural networks on high-dimensional data\n distributions suggests that it may be possible to parameterize a\n representation of a given high-dimensional function with\n controllably small errors, potentially outperforming standard\n interpolation methods. We demonstrate, both theoretically and\n numerically, that this is indeed the case. We map the parameters of\n a neural network to a system of particles relaxing with an\n interaction potential determined by the loss function. We show that\n in the limit that the number of parameters $n$ is large, the\n landscape of the mean-squared error becomes convex and the\n representation error in the function scales as $O(n^{-1})$.\n In this limit, we prove a dynamical variant of the universal\n approximation theorem showing that the optimal\n representation can be attained by stochastic gradient\n descent, the algorithm ubiquitously used for parameter optimization\n in machine learning. In the asymptotic regime, we study the\n fluctuations around the optimal representation and show that they\n arise at a scale $O(n^{-1})$. These fluctuations in the landscape\n identify the natural scale for the noise in stochastic gradient\n descent. Our results apply to both single and multi-layer neural\n networks, as well as standard kernel methods like radial basis\n functions.", "bibtex": "@inproceedings{NEURIPS2018_196f5641,\n author = {Rotskoff, Grant and Vanden-Eijnden, Eric},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Parameters as interacting particles: long time convergence and asymptotic error scaling of neural networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/196f5641aa9dc87067da4ff90fd81e7b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/196f5641aa9dc87067da4ff90fd81e7b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/196f5641aa9dc87067da4ff90fd81e7b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/196f5641aa9dc87067da4ff90fd81e7b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/196f5641aa9dc87067da4ff90fd81e7b-Reviews.html", "metareview": "", "pdf_size": 473332, "gs_citation": 164, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17805150742517037022&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Courant Institute of Mathematical Sciences, New York University; Courant Institute of Mathematical Sciences, New York University", "aff_domain": "cims.nyu.edu;cims.nyu.edu", "email": "cims.nyu.edu;cims.nyu.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/196f5641aa9dc87067da4ff90fd81e7b-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "Courant Institute of Mathematical Sciences", "aff_unique_url": "https://www.courant.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "New York", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Paraphrasing Complex Network: Network Compression via Factor Transfer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11283", "id": "11283", "author_site": "Jangho Kim, Seonguk Park, Nojun Kwak", "author": "Jangho Kim; Seonguk Park; Nojun Kwak", "abstract": "Many researchers have sought ways of model compression to reduce the size of a deep neural network (DNN) with minimal performance degradation in order to use DNNs in embedded systems. Among the model compression methods, a method called knowledge transfer is to train a student network with a stronger teacher network. In this paper, we propose a novel knowledge transfer method which uses convolutional operations to paraphrase teacher's knowledge and to translate it for the student. This is done by two convolutional modules, which are called a paraphraser and a translator. The paraphraser is trained in an unsupervised manner to extract the teacher factors which are defined as paraphrased information of the teacher network. The translator located at the student network extracts the student factors and helps to translate the teacher factors by mimicking them. We observed that our student network trained with the proposed factor transfer method outperforms the ones trained with conventional knowledge transfer methods.", "bibtex": "@inproceedings{NEURIPS2018_6d9cb7de,\n author = {Kim, Jangho and Park, Seonguk and Kwak, Nojun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Paraphrasing Complex Network: Network Compression via Factor Transfer},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6d9cb7de5e8ac30bd5e8734bc96a35c1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6d9cb7de5e8ac30bd5e8734bc96a35c1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6d9cb7de5e8ac30bd5e8734bc96a35c1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6d9cb7de5e8ac30bd5e8734bc96a35c1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6d9cb7de5e8ac30bd5e8734bc96a35c1-Reviews.html", "metareview": "", "pdf_size": 468236, "gs_citation": 688, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2520473274058783123&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Seoul National University; Seoul National University; Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "email": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6d9cb7de5e8ac30bd5e8734bc96a35c1-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "Parsimonious Bayesian deep networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11323", "id": "11323", "author": "Mingyuan Zhou", "abstract": "Combining Bayesian nonparametrics and a forward model selection strategy, we construct parsimonious Bayesian deep networks (PBDNs) that infer capacity-regularized network architectures from the data and require neither cross-validation nor fine-tuning when training the model. One of the two essential components of a PBDN is the development of a special infinite-wide single-hidden-layer neural network, whose number of active hidden units can be inferred from the data. The other one is the construction of a greedy layer-wise learning algorithm that uses a forward model selection criterion to determine when to stop adding another hidden layer. We develop both Gibbs sampling and stochastic gradient descent based maximum a posteriori inference for PBDNs, providing state-of-the-art classification accuracy and interpretable data subtypes near the decision boundaries, while maintaining low computational complexity for out-of-sample prediction.", "bibtex": "@inproceedings{NEURIPS2018_efb76cff,\n author = {Zhou, Mingyuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Parsimonious Bayesian deep networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/efb76cff97aaf057654ef2f38cd77d73-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/efb76cff97aaf057654ef2f38cd77d73-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/efb76cff97aaf057654ef2f38cd77d73-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/efb76cff97aaf057654ef2f38cd77d73-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/efb76cff97aaf057654ef2f38cd77d73-Reviews.html", "metareview": "", "pdf_size": 1292025, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14376157659087127451&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of IROM, McCombs School of Business, The University of Texas at Austin, Austin, TX 78712", "aff_domain": "mccombs.utexas.edu", "email": "mccombs.utexas.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/efb76cff97aaf057654ef2f38cd77d73-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "Department of IROM, McCombs School of Business", "aff_unique_url": "https://www.mccombs.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Parsimonious Quantile Regression of Financial Asset Tail Dynamics via Sequential Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11172", "id": "11172", "author_site": "Xing Yan, Weizhong Zhang, Lin Ma, Wei Liu, Qi Wu", "author": "Xing Yan; Weizhong Zhang; Lin Ma; Wei Liu; Qi Wu", "abstract": "We propose a parsimonious quantile regression framework to learn the dynamic tail behaviors of financial asset returns. Our model captures well both the time-varying characteristic and the asymmetrical heavy-tail property of financial time series. It combines the merits of a popular sequential neural network model, i.e., LSTM, with a novel parametric quantile function that we construct to represent the conditional distribution of asset returns. Our model also captures individually the serial dependences of higher moments, rather than just the volatility. Across a wide range of asset classes, the out-of-sample forecasts of conditional quantiles or VaR of our model outperform the GARCH family. Further, the proposed approach does not suffer from the issue of quantile crossing, nor does it expose to the ill-posedness comparing to the parametric probability density function approach.", "bibtex": "@inproceedings{NEURIPS2018_9e3cfc48,\n author = {Yan, Xing and Zhang, Weizhong and Ma, Lin and Liu, Wei and Wu, Qi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Parsimonious Quantile Regression of Financial Asset Tail Dynamics via Sequential Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9e3cfc48eccf81a0d57663e129aef3cb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9e3cfc48eccf81a0d57663e129aef3cb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9e3cfc48eccf81a0d57663e129aef3cb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9e3cfc48eccf81a0d57663e129aef3cb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9e3cfc48eccf81a0d57663e129aef3cb-Reviews.html", "metareview": "", "pdf_size": 368593, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=806805766318539414&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "Department of SEEM, The Chinese University of Hong Kong; Tencent AI Lab; Tencent AI Lab; Tencent AI Lab; School of Data Science, City University of Hong Kong", "aff_domain": "se.cuhk.edu.hk;gmail.com;gmail.com;columbia.edu;cityu.edu.hk", "email": "se.cuhk.edu.hk;gmail.com;gmail.com;columbia.edu;cityu.edu.hk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9e3cfc48eccf81a0d57663e129aef3cb-Abstract.html", "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Chinese University of Hong Kong;Tencent;City University of Hong Kong", "aff_unique_dep": "Department of SEEM;Tencent AI Lab;School of Data Science", "aff_unique_url": "https://www.cuhk.edu.hk;https://ai.tencent.com;https://www.cityu.edu.hk", "aff_unique_abbr": "CUHK;Tencent AI Lab;CityU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Partially-Supervised Image Captioning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11200", "id": "11200", "author_site": "Peter Anderson, Stephen Gould, Mark Johnson", "author": "Peter Anderson; Stephen Gould; Mark Johnson", "abstract": "Image captioning models are becoming increasingly successful at describing the content of images in restricted domains. However, if these models are to function in the wild --- for example, as assistants for people with impaired vision --- a much larger number and variety of visual concepts must be understood. To address this problem, we teach image captioning models new visual concepts from labeled images and object detection datasets. Since image labels and object classes can be interpreted as partial captions, we formulate this problem as learning from partially-specified sequence data. We then propose a novel algorithm for training sequence models, such as recurrent neural networks, on partially-specified sequences which we represent using finite state automata. In the context of image captioning, our method lifts the restriction that previously required image captioning models to be trained on paired image-sentence corpora only, or otherwise required specialized model architectures to take advantage of alternative data modalities. Applying our approach to an existing neural captioning model, we achieve state of the art results on the novel object captioning task using the COCO dataset. We further show that we can train a captioning model to describe new visual concepts from the Open Images dataset while maintaining competitive COCO evaluation scores.", "bibtex": "@inproceedings{NEURIPS2018_d2ed45a5,\n author = {Anderson, Peter and Gould, Stephen and Johnson, Mark},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Partially-Supervised Image Captioning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d2ed45a52bc0edfa11c2064e9edee8bf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d2ed45a52bc0edfa11c2064e9edee8bf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d2ed45a52bc0edfa11c2064e9edee8bf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d2ed45a52bc0edfa11c2064e9edee8bf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d2ed45a52bc0edfa11c2064e9edee8bf-Reviews.html", "metareview": "", "pdf_size": 3886875, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15350863756192758305&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Macquarie University\u2217; Australian National University; Macquarie University", "aff_domain": "mq.edu.au;anu.edu.au;mq.edu.au", "email": "mq.edu.au;anu.edu.au;mq.edu.au", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d2ed45a52bc0edfa11c2064e9edee8bf-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Macquarie University;Australian National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.macquarie.edu.au;https://www.anu.edu.au", "aff_unique_abbr": "MQ;ANU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "title": "Pelee: A Real-Time Object Detection System on Mobile Devices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11208", "id": "11208", "author_site": "Robert J. Wang, Tanner Bohn, Charles Ling", "author": "Robert J. Wang; Xiang Li; Charles X. Ling", "abstract": "An increasing need of running Convolutional Neural Network (CNN) models on mobile devices with limited computing power and memory resource encourages studies on efficient model design. A number of efficient architectures have been proposed in recent years, for example, MobileNet, ShuffleNet, and MobileNetV2. However, all these models are heavily dependent on depthwise separable convolution which lacks efficient implementation in most deep learning frameworks. In this study, we propose an efficient architecture named PeleeNet, which is built with conventional convolution instead. On ImageNet ILSVRC 2012 dataset, our proposed PeleeNet achieves a higher accuracy and 1.8 times faster speed than MobileNet and MobileNetV2 on NVIDIA TX2. Meanwhile, PeleeNet is only 66% of the model size of MobileNet. We then propose a real-time object detection system by combining PeleeNet with Single Shot MultiBox Detector (SSD) method and optimizing the architecture for fast speed. Our proposed detection system, named Pelee, achieves 76.4% mAP (mean average precision) on PASCAL VOC2007 and 22.4 mAP on MS COCO dataset at the speed of 23.6 FPS on iPhone 8 and 125 FPS on NVIDIA TX2. The result on COCO outperforms YOLOv2 in consideration of a higher precision, 13.6 times lower computational cost and 11.3 times smaller model size. The code and models are open sourced.", "bibtex": "@inproceedings{NEURIPS2018_9908279e,\n author = {Wang, Robert J. and Li, Xiang and Ling, Charles X.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Pelee: A Real-Time Object Detection System on Mobile Devices},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9908279ebbf1f9b250ba689db6a0222b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9908279ebbf1f9b250ba689db6a0222b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9908279ebbf1f9b250ba689db6a0222b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9908279ebbf1f9b250ba689db6a0222b-Reviews.html", "metareview": "", "pdf_size": 386165, "gs_citation": 693, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6276653634539850348&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Department of Computer Science, University of Western Ontario; Department of Computer Science, University of Western Ontario; Department of Computer Science, University of Western Ontario", "aff_domain": "uwo.ca;uwo.ca;uwo.ca", "email": "uwo.ca;uwo.ca;uwo.ca", "github": "https://github.com/Robert-JunWang/Pelee", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9908279ebbf1f9b250ba689db6a0222b-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Western Ontario", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.uwo.ca", "aff_unique_abbr": "UWO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Persistence Fisher Kernel: A Riemannian Manifold Kernel for Persistence Diagrams", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11949", "id": "11949", "author_site": "Tam Le, Makoto Yamada", "author": "Tam Le; Makoto Yamada", "abstract": "Algebraic topology methods have recently played an important role for statistical analysis with complicated geometric structured data such as shapes, linked twist maps, and material data. Among them, \\textit{persistent homology} is a well-known tool to extract robust topological features, and outputs as \\textit{persistence diagrams} (PDs). However, PDs are point multi-sets which can not be used in machine learning algorithms for vector data. To deal with it, an emerged approach is to use kernel methods, and an appropriate geometry for PDs is an important factor to measure the similarity of PDs. A popular geometry for PDs is the \\textit{Wasserstein metric}. However, Wasserstein distance is not \\textit{negative definite}. Thus, it is limited to build positive definite kernels upon the Wasserstein distance \\textit{without approximation}. In this work, we rely upon the alternative \\textit{Fisher information geometry} to propose a positive definite kernel for PDs \\textit{without approximation}, namely the Persistence Fisher (PF) kernel. Then, we analyze eigensystem of the integral operator induced by the proposed kernel for kernel machines. Based on that, we derive generalization error bounds via covering numbers and Rademacher averages for kernel machines with the PF kernel. Additionally, we show some nice properties such as stability and infinite divisibility for the proposed kernel. Furthermore, we also propose a linear time complexity over the number of points in PDs for an approximation of our proposed kernel with a bounded error. Throughout experiments with many different tasks on various benchmark datasets, we illustrate that the PF kernel compares favorably with other baseline kernels for PDs.", "bibtex": "@inproceedings{NEURIPS2018_959ab9a0,\n author = {Le, Tam and Yamada, Makoto},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Persistence Fisher Kernel: A Riemannian Manifold Kernel for Persistence Diagrams},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/959ab9a0695c467e7caf75431a872e5c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/959ab9a0695c467e7caf75431a872e5c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/959ab9a0695c467e7caf75431a872e5c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/959ab9a0695c467e7caf75431a872e5c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/959ab9a0695c467e7caf75431a872e5c-Reviews.html", "metareview": "", "pdf_size": 839096, "gs_citation": 98, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1409702383947125765&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "RIKEN Center for Advanced Intelligence Project, Japan; Kyoto University, Japan+RIKEN Center for Advanced Intelligence Project, Japan", "aff_domain": "riken.jp;riken.jp", "email": "riken.jp;riken.jp", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/959ab9a0695c467e7caf75431a872e5c-Abstract.html", "aff_unique_index": "0;1+0", "aff_unique_norm": "RIKEN Center for Advanced Intelligence Project;Kyoto University", "aff_unique_dep": "Center for Advanced Intelligence Project;", "aff_unique_url": "https://www.riken.jp/en/c-aip/;https://www.kyoto-u.ac.jp", "aff_unique_abbr": "RIKEN C-AIP;Kyoto U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0", "aff_country_unique": "Japan" }, { "title": "Phase Retrieval Under a Generative Prior", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11871", "id": "11871", "author_site": "Paul Hand, Oscar Leong, Vlad Voroninski", "author": "Paul Hand; Oscar Leong; Vlad Voroninski", "abstract": "We introduce a novel deep-learning inspired formulation of the \\textit{phase retrieval problem}, which asks to recover a signal $y_0 \\in \\R^n$ from $m$ quadratic observations, under structural assumptions on the underlying signal. As is common in many imaging problems, previous methodologies have considered natural signals as being sparse with respect to a known basis, resulting in the decision to enforce a generic sparsity prior. However, these methods for phase retrieval have encountered possibly fundamental limitations, as no computationally efficient algorithm for sparse phase retrieval has been proven to succeed with fewer than $O(k^2\\log n)$ generic measurements, which is larger than the theoretical optimum of $O(k \\log n)$. In this paper, we sidestep this issue by considering a prior that a natural signal is in the range of a generative neural network $G : \\R^k \\rightarrow \\R^n$. We introduce an empirical risk formulation that has favorable global geometry for gradient methods, as soon as $m = O(k)$, under the model of a multilayer fully-connected neural network with random weights. Specifically, we show that there exists a descent direction outside of a small neighborhood around the true $k$-dimensional latent code and a negative multiple thereof. This formulation for structured phase retrieval thus benefits from two effects: generative priors can more tightly represent natural signals than sparsity priors, and this empirical risk formulation can exploit those generative priors at an information theoretically optimal sample complexity, unlike for a sparsity prior. We corroborate these results with experiments showing that exploiting generative models in phase retrieval tasks outperforms both sparse and general phase retrieval methods.", "bibtex": "@inproceedings{NEURIPS2018_1bc2029a,\n author = {Hand, Paul and Leong, Oscar and Voroninski, Vlad},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Phase Retrieval Under a Generative Prior},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1bc2029a8851ad344a8d503930dfd7f7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1bc2029a8851ad344a8d503930dfd7f7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1bc2029a8851ad344a8d503930dfd7f7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1bc2029a8851ad344a8d503930dfd7f7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1bc2029a8851ad344a8d503930dfd7f7-Reviews.html", "metareview": "", "pdf_size": 2094231, "gs_citation": 228, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6936219906560546279&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Northeastern University; Rice University; Helm.ai", "aff_domain": "northeastern.edu;rice.edu;helm.ai", "email": "northeastern.edu;rice.edu;helm.ai", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1bc2029a8851ad344a8d503930dfd7f7-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Northeastern University;Rice University;Helm.ai", "aff_unique_dep": ";;", "aff_unique_url": "https://www.northeastern.edu;https://www.rice.edu;https://www.helm.ai", "aff_unique_abbr": "NEU;Rice;Helm.ai", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Pipe-SGD: A Decentralized Pipelined SGD Framework for Distributed Deep Net Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11771", "id": "11771", "author_site": "Youjie Li, Mingchao Yu, Songze Li, Salman Avestimehr, Nam Sung Kim, Alex Schwing", "author": "Youjie Li; Mingchao Yu; Songze Li; Salman Avestimehr; Nam Sung Kim; Alexander Schwing", "abstract": "Distributed training of deep nets is an important technique to address some of the present day computing challenges like memory consumption and computational demands. Classical distributed approaches, synchronous or asynchronous, are based on the parameter server architecture, i.e., worker nodes compute gradients which are communicated to the parameter server while updated parameters are returned. Recently, distributed training with AllReduce operations gained popularity as well. While many of those operations seem appealing, little is reported about wall-clock training time improvements. In this paper, we carefully analyze the AllReduce based setup, propose timing models which include network latency, bandwidth, cluster size and compute time, and demonstrate that a pipelined training with a width of two combines the best of both synchronous and asynchronous training. Specifically, for a setup consisting of a four-node GPU cluster we show wall-clock time training improvements of up to 5.4x compared to conventional approaches.", "bibtex": "@inproceedings{NEURIPS2018_2c6a0bae,\n author = {Li, Youjie and Yu, Mingchao and Li, Songze and Avestimehr, Salman and Kim, Nam Sung and Schwing, Alexander},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Pipe-SGD: A Decentralized Pipelined SGD Framework for Distributed Deep Net Training},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2c6a0bae0f071cbbf0bb3d5b11d90a82-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2c6a0bae0f071cbbf0bb3d5b11d90a82-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2c6a0bae0f071cbbf0bb3d5b11d90a82-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2c6a0bae0f071cbbf0bb3d5b11d90a82-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2c6a0bae0f071cbbf0bb3d5b11d90a82-Reviews.html", "metareview": "", "pdf_size": 647269, "gs_citation": 130, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14959986937235024923&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2c6a0bae0f071cbbf0bb3d5b11d90a82-Abstract.html" }, { "title": "Playing hard exploration games by watching YouTube", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11299", "id": "11299", "author_site": "Yusuf Aytar, Tobias Pfaff, David Budden, Thomas Paine, Ziyu Wang, Nando de Freitas", "author": "Yusuf Aytar; Tobias Pfaff; David Budden; Thomas Paine; Ziyu Wang; Nando de Freitas", "abstract": "Deep reinforcement learning methods traditionally struggle with tasks where environment rewards are particularly sparse. One successful method of guiding exploration in these domains is to imitate trajectories provided by a human demonstrator. However, these demonstrations are typically collected under artificial conditions, i.e. with access to the agent\u2019s exact environment setup and the demonstrator\u2019s action and reward trajectories. Here we propose a method that overcomes these limitations in two stages. First, we learn to map unaligned videos from multiple sources to a common representation using self-supervised objectives constructed over both time and modality (i.e. vision and sound). Second, we embed a single YouTube video in this representation to learn a reward function that encourages an agent to imitate human gameplay. This method of one-shot imitation allows our agent to convincingly exceed human-level performance on the infamously hard exploration games Montezuma\u2019s Revenge, Pitfall! and Private Eye for the first time, even if the agent is not presented with any environment rewards.", "bibtex": "@inproceedings{NEURIPS2018_35309226,\n author = {Aytar, Yusuf and Pfaff, Tobias and Budden, David and Paine, Thomas and Wang, Ziyu and de Freitas, Nando},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Playing hard exploration games by watching YouTube},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/35309226eb45ec366ca86a4329a2b7c3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/35309226eb45ec366ca86a4329a2b7c3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/35309226eb45ec366ca86a4329a2b7c3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/35309226eb45ec366ca86a4329a2b7c3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/35309226eb45ec366ca86a4329a2b7c3-Reviews.html", "metareview": "", "pdf_size": 2533725, "gs_citation": 329, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2877169605153495246&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "DeepMind, London, UK; DeepMind, London, UK; DeepMind, London, UK; DeepMind, London, UK; DeepMind, London, UK; DeepMind, London, UK", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/35309226eb45ec366ca86a4329a2b7c3-Abstract.html", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "DeepMind", "aff_unique_dep": "", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Plug-in Estimation in High-Dimensional Linear Inverse Problems: A Rigorous Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11716", "id": "11716", "author_site": "Alyson Fletcher, Parthe Pandit, Sundeep Rangan, Subrata Sarkar, Philip Schniter", "author": "Alyson K. Fletcher; Parthe Pandit; Sundeep Rangan; Subrata Sarkar; Philip Schniter", "abstract": "Estimating a vector $\\mathbf{x}$ from noisy linear measurements $\\mathbf{Ax+w}$ often requires use of prior knowledge or structural constraints\non $\\mathbf{x}$ for accurate reconstruction. Several recent works have considered combining linear least-squares estimation with a generic or plug-in ``denoiser\" function that can be designed in a modular manner based on the prior knowledge about $\\mathbf{x}$. While these methods have shown excellent performance, it has been difficult to obtain rigorous performance guarantees. This work considers plug-in denoising combined with the recently-developed Vector Approximate Message Passing (VAMP) algorithm, which is itself derived via Expectation Propagation techniques. It shown that the mean squared error of this ``plug-in\" VAMP can be exactly predicted for a large class of high-dimensional random $\\Abf$ and denoisers. The method is illustrated in image reconstruction and parametric bilinear estimation.", "bibtex": "@inproceedings{NEURIPS2018_2ad9e5e9,\n author = {Fletcher, Alyson K and Pandit, Parthe and Rangan, Sundeep and Sarkar, Subrata and Schniter, Philip},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Plug-in Estimation in High-Dimensional Linear Inverse Problems: A Rigorous Analysis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2ad9e5e943e43cad612a7996c12a8796-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2ad9e5e943e43cad612a7996c12a8796-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2ad9e5e943e43cad612a7996c12a8796-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2ad9e5e943e43cad612a7996c12a8796-Reviews.html", "metareview": "", "pdf_size": 219254, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13473932211593700080&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "Dept. Statistics, UC Los Angeles; Dept. ECE, UC Los Angeles; Dept. ECE, NYU; Dept. ECE, The Ohio State Univ.; Dept. ECE, The Ohio State Univ.", "aff_domain": "ucla.edu;ucla.edu;nyu.edu;osu.edu;osu.edu", "email": "ucla.edu;ucla.edu;nyu.edu;osu.edu;osu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2ad9e5e943e43cad612a7996c12a8796-Abstract.html", "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "University of California, Los Angeles;New York University;Ohio State University", "aff_unique_dep": "Department of Statistics;Department of Electrical and Computer Engineering;Dept. of Electrical and Computer Engineering", "aff_unique_url": "https://www.ucla.edu;https://www.nyu.edu;https://www.osu.edu", "aff_unique_abbr": "UCLA;NYU;OSU", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Los Angeles;New York;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Point process latent variable models of larval zebrafish behavior", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12032", "id": "12032", "author_site": "Anuj Sharma, Robert Johnson, Florian Engert, Scott Linderman", "author": "Anuj Sharma; Robert Johnson; Florian Engert; Scott Linderman", "abstract": "A fundamental goal of systems neuroscience is to understand how neural activity gives rise to natural behavior. In order to achieve this goal, we must first build comprehensive models that offer quantitative descriptions of behavior. We develop a new class of probabilistic models to tackle this challenge in the study of larval zebrafish, an important model organism for neuroscience. Larval zebrafish locomote via sequences of punctate swim bouts--brief flicks of the tail--which are naturally modeled as a marked point process. However, these sequences of swim bouts belie a set of discrete and continuous internal states, latent variables that are not captured by standard point process models. We incorporate these variables as latent marks of a point process and explore various models for their dynamics. To infer the latent variables and fit the parameters of this model, we develop an amortized variational inference algorithm that targets the collapsed posterior distribution, analytically marginalizing out the discrete latent variables. With a dataset of over 120,000 swim bouts, we show that our models reveal interpretable discrete classes of swim bouts and continuous internal states like hunger that modulate their dynamics. These models are a major step toward understanding the natural behavioral program of the larval zebrafish and, ultimately, its neural underpinnings.", "bibtex": "@inproceedings{NEURIPS2018_e02af582,\n author = {Sharma, Anuj and Johnson, Robert and Engert, Florian and Linderman, Scott},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Point process latent variable models of larval zebrafish behavior},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e02af5824e1eb6ad58d6bc03ac9e827f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e02af5824e1eb6ad58d6bc03ac9e827f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e02af5824e1eb6ad58d6bc03ac9e827f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e02af5824e1eb6ad58d6bc03ac9e827f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e02af5824e1eb6ad58d6bc03ac9e827f-Reviews.html", "metareview": "", "pdf_size": 7127722, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17088009997976169622&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Columbia University; Harvard University; Harvard University; Columbia University", "aff_domain": "columbia.edu;harvard.edu;harvard.edu;columbia.edu", "email": "columbia.edu;harvard.edu;harvard.edu;columbia.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e02af5824e1eb6ad58d6bc03ac9e827f-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Columbia University;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.columbia.edu;https://www.harvard.edu", "aff_unique_abbr": "Columbia;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "PointCNN: Convolution On X-Transformed Points", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11104", "id": "11104", "author_site": "Yangyan Li, Rui Bu, Mingchao Sun, Wei Wu, Xinhan Di, Baoquan Chen", "author": "Yangyan Li; Rui Bu; Mingchao Sun; Wei Wu; Xinhan Di; Baoquan Chen", "abstract": "We present a simple and general framework for feature learning from point cloud. The key to the success of CNNs is the convolution operator that is capable of leveraging spatially-local correlation in data represented densely in grids (e.g. images). However, point cloud are irregular and unordered, thus a direct convolving of kernels against the features associated with the points will result in deserting the shape information while being variant to the orders. To address these problems, we propose to learn a X-transformation from the input points, which is used for simultaneously weighting the input features associated with the points and permuting them into latent potentially canonical order. Then element-wise product and sum operations of typical convolution operator are applied on the X-transformed features. The proposed method is a generalization of typical CNNs into learning features from point cloud, thus we call it PointCNN. Experiments show that PointCNN achieves on par or better performance than state-of-the-art methods on multiple challenging benchmark datasets and tasks.", "bibtex": "@inproceedings{NEURIPS2018_f5f8590c,\n author = {Li, Yangyan and Bu, Rui and Sun, Mingchao and Wu, Wei and Di, Xinhan and Chen, Baoquan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {PointCNN: Convolution On X-Transformed Points},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f5f8590cd58a54e94377e6ae2eded4d9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f5f8590cd58a54e94377e6ae2eded4d9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f5f8590cd58a54e94377e6ae2eded4d9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f5f8590cd58a54e94377e6ae2eded4d9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f5f8590cd58a54e94377e6ae2eded4d9-Reviews.html", "metareview": "", "pdf_size": 1666907, "gs_citation": 3472, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9461711858418183791&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Shandong University; Shandong University; Shandong University; Shandong University; Huawei Inc.; Peking University", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f5f8590cd58a54e94377e6ae2eded4d9-Abstract.html", "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Shandong University;Huawei;Peking University", "aff_unique_dep": ";Huawei;", "aff_unique_url": "http://www.sdu.edu.cn;https://www.huawei.com;http://www.pku.edu.cn", "aff_unique_abbr": "SDU;Huawei;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Poison Frogs! Targeted Clean-Label Poisoning Attacks on Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11592", "id": "11592", "author_site": "Ali Shafahi, W. Ronny Huang, Mahyar Najibi, Octavian Suciu, Christoph Studer, Tudor Dumitras, Tom Goldstein", "author": "Ali Shafahi; W. Ronny Huang; Mahyar Najibi; Octavian Suciu; Christoph Studer; Tudor Dumitras; Tom Goldstein", "abstract": "Data poisoning is an attack on machine learning models wherein the attacker adds examples to the training set to manipulate the behavior of the model at test time. This paper explores poisoning attacks on neural nets. The proposed attacks use ``clean-labels''; they don't require the attacker to have any control over the labeling of training data. They are also targeted; they control the behavior of the classifier on a specific test instance without degrading overall classifier performance. For example, an attacker could add a seemingly innocuous image (that is properly labeled) to a training set for a face recognition engine, and control the identity of a chosen person at test time. Because the attacker does not need to control the labeling function, poisons could be entered into the training set simply by putting them online and waiting for them to be scraped by a data collection bot.", "bibtex": "@inproceedings{NEURIPS2018_22722a34,\n author = {Shafahi, Ali and Huang, W. Ronny and Najibi, Mahyar and Suciu, Octavian and Studer, Christoph and Dumitras, Tudor and Goldstein, Tom},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Poison Frogs! Targeted Clean-Label Poisoning Attacks on Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/22722a343513ed45f14905eb07621686-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/22722a343513ed45f14905eb07621686-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/22722a343513ed45f14905eb07621686-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/22722a343513ed45f14905eb07621686-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/22722a343513ed45f14905eb07621686-Reviews.html", "metareview": "", "pdf_size": 2398057, "gs_citation": 1414, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2909175979109217787&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "University of Maryland; University of Maryland; University of Maryland; University of Maryland; Cornell University; University of Maryland; University of Maryland", "aff_domain": "cs.umd.edu;umd.edu;cs.umd.edu;umiacs.umd.edu;cornell.edu;umiacs.umd.edu;cs.umd.edu", "email": "cs.umd.edu;umd.edu;cs.umd.edu;umiacs.umd.edu;cornell.edu;umiacs.umd.edu;cs.umd.edu", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/22722a343513ed45f14905eb07621686-Abstract.html", "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "University of Maryland;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.cornell.edu", "aff_unique_abbr": "UMD;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Policy Optimization via Importance Sampling", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11531", "id": "11531", "author_site": "Alberto Maria Metelli, Matteo Papini, Francesco Faccio, Marcello Restelli", "author": "Alberto Maria Metelli; Matteo Papini; Francesco Faccio; Marcello Restelli", "abstract": "Policy optimization is an effective reinforcement learning approach to solve continuous control tasks. Recent achievements have shown that alternating online and offline optimization is a successful choice for efficient trajectory reuse. However, deciding when to stop optimizing and collect new trajectories is non-trivial, as it requires to account for the variance of the objective function estimate. In this paper, we propose a novel, model-free, policy search algorithm, POIS, applicable in both action-based and parameter-based settings. We first derive a high-confidence bound for importance sampling estimation; then we define a surrogate objective function, which is optimized offline whenever a new batch of trajectories is collected. Finally, the algorithm is tested on a selection of continuous control tasks, with both linear and deep policies, and compared with state-of-the-art policy optimization methods.", "bibtex": "@inproceedings{NEURIPS2018_6aed000a,\n author = {Metelli, Alberto Maria and Papini, Matteo and Faccio, Francesco and Restelli, Marcello},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Policy Optimization via Importance Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6aed000af86a084f9cb0264161e29dd3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6aed000af86a084f9cb0264161e29dd3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6aed000af86a084f9cb0264161e29dd3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6aed000af86a084f9cb0264161e29dd3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6aed000af86a084f9cb0264161e29dd3-Reviews.html", "metareview": "", "pdf_size": 691504, "gs_citation": 124, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16130728419946747088&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Politecnico di Milano, Milan, Italy; Politecnico di Milano, Milan, Italy; Politecnico di Milano, Milan, Italy + IDSIA, USI-SUPSI, Lugano, Switzerland; Politecnico di Milano, Milan, Italy", "aff_domain": "polimi.it;polimi.it;mail.polimi.it;polimi.it", "email": "polimi.it;polimi.it;mail.polimi.it;polimi.it", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6aed000af86a084f9cb0264161e29dd3-Abstract.html", "aff_unique_index": "0;0;0+1;0", "aff_unique_norm": "Politecnico di Milano;IDSIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.polimi.it;https://www.idsia.ch", "aff_unique_abbr": "Polimi;IDSIA", "aff_campus_unique_index": "0;0;0+1;0", "aff_campus_unique": "Milan;Lugano", "aff_country_unique_index": "0;0;0+1;0", "aff_country_unique": "Italy;Switzerland" }, { "title": "Policy Regret in Repeated Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11650", "id": "11650", "author_site": "Raman Arora, Michael Dinitz, Teodor Vanislavov Marinov, Mehryar Mohri", "author": "Raman Arora; Michael Dinitz; Teodor Vanislavov Marinov; Mehryar Mohri", "abstract": "The notion of", "bibtex": "@inproceedings{NEURIPS2018_8643c8e2,\n author = {Arora, Raman and Dinitz, Michael and Marinov, Teodor Vanislavov and Mohri, Mehryar},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Policy Regret in Repeated Games},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8643c8e2107ba86c47371e037059c4b7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8643c8e2107ba86c47371e037059c4b7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8643c8e2107ba86c47371e037059c4b7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8643c8e2107ba86c47371e037059c4b7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8643c8e2107ba86c47371e037059c4b7-Reviews.html", "metareview": "", "pdf_size": 828568, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15056470449437308816&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Dept. of Computer Science, Johns Hopkins University; Dept. of Computer Science, Johns Hopkins University; Dept. of Computer Science, Johns Hopkins University; Courant Institute and Google Research", "aff_domain": "cs.jhu.edu;cs.jhu.edu;jhu.edu;cims.nyu.edu", "email": "cs.jhu.edu;cs.jhu.edu;jhu.edu;cims.nyu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8643c8e2107ba86c47371e037059c4b7-Abstract.html", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Johns Hopkins University;Courant Institute", "aff_unique_dep": "Dept. of Computer Science;Courant Institute", "aff_unique_url": "https://www.jhu.edu;https://courant.nyu.edu", "aff_unique_abbr": "JHU;Courant", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Policy-Conditioned Uncertainty Sets for Robust Markov Decision Processes", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11853", "id": "11853", "author_site": "Andrea Tirinzoni, Marek Petrik, Xiangli Chen, Brian Ziebart", "author": "Andrea Tirinzoni; Marek Petrik; Xiangli Chen; Brian Ziebart", "abstract": "What policy should be employed in a Markov decision process with uncertain parameters? Robust optimization answer to this question is to use rectangular uncertainty sets, which independently reflect available knowledge about each state, and then obtains a decision policy that maximizes expected reward for the worst-case decision process parameters from these uncertainty sets. While this rectangularity is convenient computationally and leads to tractable solutions, it often produces policies that are too conservative in practice, and does not facilitate knowledge transfer between portions of the state space or across related decision processes. In this work, we propose non-rectangular uncertainty sets that bound marginal moments of state-action features defined over entire trajectories through a decision process. This enables generalization to different portions of the state space while retaining appropriate uncertainty of the decision process. We develop algorithms for solving the resulting robust decision problems, which reduce to finding an optimal policy for a mixture of decision processes, and demonstrate the benefits of our approach experimentally.", "bibtex": "@inproceedings{NEURIPS2018_7ec0dbee,\n author = {Tirinzoni, Andrea and Petrik, Marek and Chen, Xiangli and Ziebart, Brian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Policy-Conditioned Uncertainty Sets for Robust Markov Decision Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7ec0dbeee45813422897e04ad8424a5e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7ec0dbeee45813422897e04ad8424a5e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7ec0dbeee45813422897e04ad8424a5e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7ec0dbeee45813422897e04ad8424a5e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7ec0dbeee45813422897e04ad8424a5e-Reviews.html", "metareview": "", "pdf_size": 463001, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7217550747113619280&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Politecnico di Milano; Amazon Robotics; University of New Hampshire; University of Illinois at Chicago", "aff_domain": "polimi.it;amazon.com;cs.unh.edu;uic.edu", "email": "polimi.it;amazon.com;cs.unh.edu;uic.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7ec0dbeee45813422897e04ad8424a5e-Abstract.html", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Politecnico di Milano;Amazon;University of New Hampshire;University of Illinois at Chicago", "aff_unique_dep": ";Amazon Robotics;;", "aff_unique_url": "https://www.polimi.it;https://www.amazonrobotics.com;https://www.unh.edu;https://www.uic.edu", "aff_unique_abbr": "Polimi;Amazon Robotics;UNH;UIC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Italy;United States" }, { "title": "Porcupine Neural Networks: Approximating Neural Network Landscapes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11474", "id": "11474", "author_site": "Soheil Feizi, Hamid Javadi, Jesse Zhang, David Tse", "author": "Soheil Feizi; Hamid Javadi; Jesse Zhang; David Tse", "abstract": "Neural networks have been used prominently in several machine learning and statistics applications. In general, the underlying optimization of neural networks is non-convex which makes analyzing their performance challenging. In this paper, we take another approach to this problem by constraining the network such that the corresponding optimization landscape has good theoretical properties without significantly compromising performance. In particular, for two-layer neural networks we introduce Porcupine Neural Networks (PNNs) whose weight vectors are constrained to lie over a finite set of lines. We show that most local optima of PNN optimizations are global while we have a characterization of regions where bad local optimizers may exist. Moreover, our theoretical and empirical results suggest that an unconstrained neural network can be approximated using a polynomially-large PNN.", "bibtex": "@inproceedings{NEURIPS2018_b6cda17a,\n author = {Feizi, Soheil and Javadi, Hamid and Zhang, Jesse and Tse, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Porcupine Neural Networks: Approximating Neural Network Landscapes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b6cda17abb967ed28ec9610137aa45f7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b6cda17abb967ed28ec9610137aa45f7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b6cda17abb967ed28ec9610137aa45f7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b6cda17abb967ed28ec9610137aa45f7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b6cda17abb967ed28ec9610137aa45f7-Reviews.html", "metareview": "", "pdf_size": 557589, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15256638883416906014&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Department of Computer Science, University of Maryland, College Park; Department of Electrical and Computer Engineering, Rice University; Department of Electrical Engineering, Stanford University; Department of Electrical Engineering, Stanford University", "aff_domain": "cs.umd.edu;rice.edu;stanford.edu;stanford.edu", "email": "cs.umd.edu;rice.edu;stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b6cda17abb967ed28ec9610137aa45f7-Abstract.html", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Maryland, College Park;Rice University;Stanford University", "aff_unique_dep": "Department of Computer Science;Department of Electrical and Computer Engineering;Department of Electrical Engineering", "aff_unique_url": "https://www/umd.edu;https://www.rice.edu;https://www.stanford.edu", "aff_unique_abbr": "UMD;Rice;Stanford", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "College Park;;Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Post: Device Placement with Cross-Entropy Minimization and Proximal Policy Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11946", "id": "11946", "author_site": "Yuanxiang Gao, Li Chen, Baochun Li", "author": "Yuanxiang Gao; Li Chen; Baochun Li", "abstract": "Training deep neural networks requires an exorbitant amount of computation resources, including a heterogeneous mix of GPU and CPU devices. It is critical to place operations in a neural network on these devices in an optimal way, so that the training process can complete within the shortest amount of time. The state-of-the-art uses reinforcement learning to learn placement skills by repeatedly performing Monte-Carlo experiments. However, due to its equal treatment of placement samples, we argue that there remains ample room for significant improvements. In this paper, we propose a new joint learning algorithm, called Post, that integrates cross-entropy minimization and proximal policy optimization to achieve theoretically guaranteed optimal efficiency. In order to incorporate the cross-entropy method as a sampling technique, we propose to represent placements using discrete probability distributions, which allows us to estimate an optimal probability mass by maximal likelihood estimation, a powerful tool with the best possible efficiency. We have implemented Post in the Google Cloud platform, and our extensive experiments with several popular neural network training benchmarks have demonstrated clear evidence of superior performance: with the same amount of learning time, it leads to placements that have training times up to 63.7% shorter over the state-of-the-art.", "bibtex": "@inproceedings{NEURIPS2018_92650b2e,\n author = {Gao, Yuanxiang and Chen, Li and Li, Baochun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Post: Device Placement with Cross-Entropy Minimization and Proximal Policy Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/92650b2e92217715fe312e6fa7b90d82-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/92650b2e92217715fe312e6fa7b90d82-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/92650b2e92217715fe312e6fa7b90d82-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/92650b2e92217715fe312e6fa7b90d82-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/92650b2e92217715fe312e6fa7b90d82-Reviews.html", "metareview": "", "pdf_size": 816420, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17992886404440627629&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Electrical and Computer Engineering, University of Toronto + School of Information and Communication Engineering, University of Electronic Science and Technology of China; School of Computing and Informatics, University of Louisiana at Lafayette; Department of Electrical and Computer Engineering, University of Toronto", "aff_domain": "ece.utoronto.ca;louisiana.edu;ece.toronto.edu", "email": "ece.utoronto.ca;louisiana.edu;ece.toronto.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/92650b2e92217715fe312e6fa7b90d82-Abstract.html", "aff_unique_index": "0+1;2;0", "aff_unique_norm": "University of Toronto;University of Electronic Science and Technology of China;University of Louisiana at Lafayette", "aff_unique_dep": "Department of Electrical and Computer Engineering;School of Information and Communication Engineering;School of Computing and Informatics", "aff_unique_url": "https://www.utoronto.ca;https://www.uestc.edu.cn;https://www.louisiana.edu", "aff_unique_abbr": "U of T;UESTC;ULL", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Toronto;;Lafayette", "aff_country_unique_index": "0+1;2;0", "aff_country_unique": "Canada;China;United States" }, { "title": "Posterior Concentration for Sparse Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11114", "id": "11114", "author_site": "Veronika Rockova, nicholas polson", "author": "Nicholas G Polson; Veronika Ro\u010dkov\u00e1", "abstract": "We introduce Spike-and-Slab Deep Learning (SS-DL), a fully Bayesian alternative to dropout for improving generalizability of deep ReLU networks. This new type of regularization enables provable recovery of smooth input-output maps with {\\sl unknown} levels of smoothness. Indeed, we show that the posterior distribution concentrates at the near minimax rate for alpha-Holder smooth maps, performing as well as if we knew the smoothness level alpha ahead of time. Our result sheds light on architecture design for deep neural networks, namely the choice of depth, width and sparsity level. These network attributes typically depend on unknown smoothness in order to be optimal. We obviate this constraint with the fully Bayes construction. As an aside, we show that SS-DL does not overfit in the sense that the posterior concentrates on smaller networks with fewer (up to the optimal number of) nodes and links. Our results provide new theoretical justifications for deep ReLU networks from a Bayesian point of view.", "bibtex": "@inproceedings{NEURIPS2018_59b90e10,\n author = {Polson, Nicholas G and Ro\\v{c}kov\\'{a}, Veronika},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Posterior Concentration for Sparse Deep Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/59b90e1005a220e2ebc542eb9d950b1e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/59b90e1005a220e2ebc542eb9d950b1e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/59b90e1005a220e2ebc542eb9d950b1e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/59b90e1005a220e2ebc542eb9d950b1e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/59b90e1005a220e2ebc542eb9d950b1e-Reviews.html", "metareview": "", "pdf_size": 302989, "gs_citation": 121, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11591653098517759538&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Booth School of Business, University of Chicago; Booth School of Business, University of Chicago", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/59b90e1005a220e2ebc542eb9d950b1e-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "Booth School of Business", "aff_unique_url": "https://www.chicagobooth.edu", "aff_unique_abbr": "Chicago Booth", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Power-law efficient neural codes provide general link between perceptual bias and discriminability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11496", "id": "11496", "author_site": "Michael J Morais, Jonathan Pillow", "author": "Michael Morais; Jonathan W Pillow", "abstract": "Recent work in theoretical neuroscience has shown that information-theoretic \"efficient\" neural codes, which allocate neural resources to maximize the mutual information between stimuli and neural responses, give rise to a lawful relationship between perceptual bias and discriminability that is observed across a wide variety of psychophysical tasks in human observers (Wei & Stocker 2017). Here we generalize these results to show that the same law arises under a much larger family of optimal neural codes, introducing a unifying framework that we call power-law efficient coding. Specifically, we show that the same lawful relationship between bias and discriminability arises whenever Fisher information is allocated proportional to any power of the prior distribution. This family includes neural codes that are optimal for minimizing Lp error for any p, indicating that the lawful relationship observed in human psychophysical data does not require information-theoretically optimal neural codes. Furthermore, we derive the exact constant of proportionality governing the relationship between bias and discriminability for different power laws (which includes information-theoretically optimal codes, where the power is 2, and so-called discrimax codes, where power is 1/2), and different choices of optimal decoder. As a bonus, our framework provides new insights into \"anti-Bayesian\" perceptual biases, in which percepts are biased away from the center of mass of the prior. We derive an explicit formula that clarifies precisely which combinations of neural encoder and decoder can give rise to such biases.", "bibtex": "@inproceedings{NEURIPS2018_61d77652,\n author = {Morais, Michael and Pillow, Jonathan W},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Power-law efficient neural codes provide general link between perceptual bias and discriminability},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/61d77652c97ef636343742fc3dcf3ba9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/61d77652c97ef636343742fc3dcf3ba9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/61d77652c97ef636343742fc3dcf3ba9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/61d77652c97ef636343742fc3dcf3ba9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/61d77652c97ef636343742fc3dcf3ba9-Reviews.html", "metareview": "", "pdf_size": 2332363, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2084467115563277510&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Princeton Neuroscience Institute & Department of Psychology, Princeton University; Princeton Neuroscience Institute & Department of Psychology, Princeton University", "aff_domain": "princeton.edu;princeton.edu", "email": "princeton.edu;princeton.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/61d77652c97ef636343742fc3dcf3ba9-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "Princeton Neuroscience Institute & Department of Psychology", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Practical Deep Stereo (PDS): Toward applications-friendly deep stereo matching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11571", "id": "11571", "author_site": "Stepan Tulyakov, Anton Ivanov, Fran\u00e7ois Fleuret", "author": "Stepan Tulyakov; Anton Ivanov; Fran\u00e7ois Fleuret", "abstract": "End-to-end deep-learning networks recently demonstrated extremely good performance for stereo matching. However, existing networks are difficult to use for practical applications since (1) they are memory-hungry and unable to process even modest-size images, (2) they have to be fully re-trained to handle a different disparity range.", "bibtex": "@inproceedings{NEURIPS2018_ade55409,\n author = {Tulyakov, Stepan and Ivanov, Anton and Fleuret, Fran\\c{c}ois},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Practical Deep Stereo (PDS): Toward applications-friendly deep stereo matching},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ade55409d1224074754035a5a937d2e0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ade55409d1224074754035a5a937d2e0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ade55409d1224074754035a5a937d2e0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ade55409d1224074754035a5a937d2e0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ade55409d1224074754035a5a937d2e0-Reviews.html", "metareview": "", "pdf_size": 449607, "gs_citation": 149, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12572631095090620233&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Space Engineering Center at \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne; Space Engineering Center at \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne; \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne + Idiap Research Institute", "aff_domain": "epfl.ch;epfl.ch;idiap.ch", "email": "epfl.ch;epfl.ch;idiap.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ade55409d1224074754035a5a937d2e0-Abstract.html", "aff_unique_index": "0;0;0+1", "aff_unique_norm": "EPFL;Idiap Research Institute", "aff_unique_dep": "Space Engineering Center;", "aff_unique_url": "https://www.epfl.ch;https://www.idiap.ch", "aff_unique_abbr": "EPFL;Idiap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+0", "aff_country_unique": "Switzerland" }, { "title": "Practical Methods for Graph Two-Sample Testing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11307", "id": "11307", "author_site": "Debarghya Ghoshdastidar, Ulrike von Luxburg", "author": "Debarghya Ghoshdastidar; Ulrike von Luxburg", "abstract": "Hypothesis testing for graphs has been an important tool in applied research fields for more than two decades, and still remains a challenging problem as one often needs to draw inference from few replicates of large graphs. Recent studies in statistics and learning theory have provided some theoretical insights about such high-dimensional graph testing problems, but the practicality of the developed theoretical methods remains an open question.", "bibtex": "@inproceedings{NEURIPS2018_dfa92d8f,\n author = {Ghoshdastidar, Debarghya and von Luxburg, Ulrike},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Practical Methods for Graph Two-Sample Testing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dfa92d8f817e5b08fcaafb50d03763cf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dfa92d8f817e5b08fcaafb50d03763cf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dfa92d8f817e5b08fcaafb50d03763cf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dfa92d8f817e5b08fcaafb50d03763cf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dfa92d8f817e5b08fcaafb50d03763cf-Reviews.html", "metareview": "", "pdf_size": 352477, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3213877141900838189&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Department of Computer Science, University of T\u00fcbingen + Max Planck Institute for Intelligent Systems; Department of Computer Science, University of T\u00fcbingen", "aff_domain": "informatik.uni-tuebingen.de;informatik.uni-tuebingen.de", "email": "informatik.uni-tuebingen.de;informatik.uni-tuebingen.de", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dfa92d8f817e5b08fcaafb50d03763cf-Abstract.html", "aff_unique_index": "0+1;0", "aff_unique_norm": "University of T\u00fcbingen;Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Department of Computer Science;Intelligent Systems", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.mpi-is.mpg.de", "aff_unique_abbr": ";MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0", "aff_country_unique": "Germany" }, { "title": "Practical exact algorithm for trembling-hand equilibrium refinements in games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11493", "id": "11493", "author_site": "Gabriele Farina, Nicola Gatti, Tuomas Sandholm", "author": "Gabriele Farina; Nicola Gatti; Tuomas Sandholm", "abstract": "Nash equilibrium strategies have the known weakness that they do not prescribe rational play in situations that are reached with zero probability according to the strategies themselves, for example, if players have made mistakes. Trembling-hand refinements---such as extensive-form perfect equilibria and quasi-perfect equilibria---remedy this problem in sound ways. Despite their appeal, they have not received attention in practice since no known algorithm for computing them scales beyond toy instances. In this paper, we design an exact polynomial-time algorithm for finding trembling-hand equilibria in zero-sum extensive-form games. It is several orders of magnitude faster than the best prior ones, numerically stable, and quickly solves game instances with tens of thousands of nodes in the game tree. It enables, for the first time, the use of trembling-hand refinements in practice.", "bibtex": "@inproceedings{NEURIPS2018_fc79250f,\n author = {Farina, Gabriele and Gatti, Nicola and Sandholm, Tuomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Practical exact algorithm for trembling-hand equilibrium refinements in games},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fc79250f8c5b804390e8da280b4cf06e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fc79250f8c5b804390e8da280b4cf06e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/fc79250f8c5b804390e8da280b4cf06e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fc79250f8c5b804390e8da280b4cf06e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fc79250f8c5b804390e8da280b4cf06e-Reviews.html", "metareview": "", "pdf_size": 538737, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3976443965922577123&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Computer Science Department, Carnegie Mellon University; DEIB, Politecnico di Milano; Computer Science Department, Carnegie Mellon University", "aff_domain": "cs.cmu.edu;polimi.it;cs.cmu.edu", "email": "cs.cmu.edu;polimi.it;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fc79250f8c5b804390e8da280b4cf06e-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;Politecnico di Milano", "aff_unique_dep": "Computer Science Department;DEIB", "aff_unique_url": "https://www.cmu.edu;https://www.polimi.it", "aff_unique_abbr": "CMU;Politecnico di Milano", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Italy" }, { "title": "Precision and Recall for Time Series", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11204", "id": "11204", "author_site": "Nesime Tatbul, Tae Jun Lee, Stan Zdonik, Mejbah Alam, Justin Gottschlich", "author": "Nesime Tatbul; Tae Jun Lee; Stan Zdonik; Mejbah Alam; Justin Gottschlich", "abstract": "Classical anomaly detection is principally concerned with point-based anomalies, those anomalies that occur at a single point in time. Yet, many real-world anomalies are range-based, meaning they occur over a period of time. Motivated by this observation, we present a new mathematical model to evaluate the accuracy of time series classification algorithms. Our model expands the well-known Precision and Recall metrics to measure ranges, while simultaneously enabling customization support for domain-specific preferences.", "bibtex": "@inproceedings{NEURIPS2018_8f468c87,\n author = {Tatbul, Nesime and Lee, Tae Jun and Zdonik, Stan and Alam, Mejbah and Gottschlich, Justin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Precision and Recall for Time Series},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8f468c873a32bb0619eaeb2050ba45d1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8f468c873a32bb0619eaeb2050ba45d1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8f468c873a32bb0619eaeb2050ba45d1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8f468c873a32bb0619eaeb2050ba45d1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8f468c873a32bb0619eaeb2050ba45d1-Reviews.html", "metareview": "", "pdf_size": 999354, "gs_citation": 264, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8264723895869910586&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Intel Labs+MIT; Microsoft; Brown University; Intel Labs; Intel Labs", "aff_domain": "csail.mit.edu;alumni.brown.edu;cs.brown.edu;intel.com;intel.com", "email": "csail.mit.edu;alumni.brown.edu;cs.brown.edu;intel.com;intel.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8f468c873a32bb0619eaeb2050ba45d1-Abstract.html", "aff_unique_index": "0+1;2;3;0;0", "aff_unique_norm": "Intel;Massachusetts Institute of Technology;Microsoft;Brown University", "aff_unique_dep": "Intel Labs;;Microsoft Corporation;", "aff_unique_url": "https://www.intel.com;https://web.mit.edu;https://www.microsoft.com;https://www.brown.edu", "aff_unique_abbr": "Intel;MIT;Microsoft;Brown", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Predict Responsibly: Improving Fairness and Accuracy by Learning to Defer", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11596", "id": "11596", "author_site": "David Madras, Toni Pitassi, Richard Zemel", "author": "David Madras; Toni Pitassi; Richard Zemel", "abstract": "In many machine learning applications, there are multiple decision-makers involved, both automated and human. The interaction between these agents often goes unaddressed in algorithmic development. In this work, we explore a simple version of this interaction with a two-stage framework containing an automated model and an external decision-maker. The model can choose to say PASS, and pass the decision downstream, as explored in rejection learning. We extend this concept by proposing \"learning to defer\", which generalizes rejection learning by considering the effect of other agents in the decision-making process. We propose a learning algorithm which accounts for potential biases held by external decision-makers in a system. Experiments demonstrate that learning to defer can make systems not only more accurate but also less biased. Even when working with inconsistent or biased users, we show that deferring models still greatly improve the accuracy and/or fairness of the entire system.", "bibtex": "@inproceedings{NEURIPS2018_09d37c08,\n author = {Madras, David and Pitassi, Toni and Zemel, Richard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Predict Responsibly: Improving Fairness and Accuracy by Learning to Defer},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/09d37c08f7b129e96277388757530c72-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/09d37c08f7b129e96277388757530c72-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/09d37c08f7b129e96277388757530c72-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/09d37c08f7b129e96277388757530c72-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/09d37c08f7b129e96277388757530c72-Reviews.html", "metareview": "", "pdf_size": 522279, "gs_citation": 298, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12417404711894813357&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "University of Toronto; University of Toronto; University of Toronto + Vector Institute", "aff_domain": "cs.toronto.edu;cs.toronto.edu;cs.toronto.edu", "email": "cs.toronto.edu;cs.toronto.edu;cs.toronto.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/09d37c08f7b129e96277388757530c72-Abstract.html", "aff_unique_index": "0;0;0+1", "aff_unique_norm": "University of Toronto;Vector Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://vectorinstitute.ai/", "aff_unique_abbr": "U of T;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+0", "aff_country_unique": "Canada" }, { "title": "Predictive Approximate Bayesian Computation via Saddle Points", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11971", "id": "11971", "author_site": "Yingxiang Yang, Bo Dai, Negar Kiyavash, Niao He", "author": "Yingxiang Yang; Bo Dai; Negar Kiyavash; Niao He", "abstract": "Approximate Bayesian computation (ABC) is an important methodology for Bayesian inference when the likelihood function is intractable. Sampling-based ABC algorithms such as rejection- and K2-ABC are inefficient when the parameters have high dimensions, while the regression-based algorithms such as K- and DR-ABC are hard to scale. In this paper, we introduce an optimization-based ABC framework that addresses these deficiencies. Leveraging a generative model for posterior and joint distribution matching, we show that ABC can be framed as saddle point problems, whose objectives can be accessed directly with samples. We present the predictive ABC algorithm (P-ABC), and provide a probabilistically approximately correct (PAC) bound that guarantees its learning consistency. Numerical experiment shows that P-ABC outperforms both K2- and DR-ABC significantly.", "bibtex": "@inproceedings{NEURIPS2018_a14185bf,\n author = {Yang, Yingxiang and Dai, Bo and Kiyavash, Negar and He, Niao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Predictive Approximate Bayesian Computation via Saddle Points},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a14185bf0c82b3369f86efb3cac5ad28-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a14185bf0c82b3369f86efb3cac5ad28-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a14185bf0c82b3369f86efb3cac5ad28-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a14185bf0c82b3369f86efb3cac5ad28-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a14185bf0c82b3369f86efb3cac5ad28-Reviews.html", "metareview": "", "pdf_size": 418229, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10582020474498064352&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign*; Google Brain\u22c6; Department of Industrial and Enterprise Systems Engineering, University of Illinois at Urbana-Champaign\u2020; Department of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign*\u2020", "aff_domain": "illinois.edu;gmail.com;illinois.edu;illinois.edu", "email": "illinois.edu;gmail.com;illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a14185bf0c82b3369f86efb3cac5ad28-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Google", "aff_unique_dep": "Department of Electrical and Computer Engineering;Google Brain", "aff_unique_url": "https://illinois.edu;https://brain.google.com", "aff_unique_abbr": "UIUC;Google Brain", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Urbana-Champaign;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Predictive Uncertainty Estimation via Prior Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11679", "id": "11679", "author_site": "Andrey Malinin, Mark Gales", "author": "Andrey Malinin; Mark Gales", "abstract": "Estimating how uncertain an AI system is in its predictions is important to improve the safety of such systems. Uncertainty in predictive can result from uncertainty in model parameters, irreducible \\emph{data uncertainty} and uncertainty due to distributional mismatch between the test and training data distributions. Different actions might be taken depending on the source of the uncertainty so it is important to be able to distinguish between them. Recently, baseline tasks and metrics have been defined and several practical methods to estimate uncertainty developed. These methods, however, attempt to model uncertainty due to distributional mismatch either implicitly through \\emph{model uncertainty} or as \\emph{data uncertainty}. This work proposes a new framework for modeling predictive uncertainty called Prior Networks (PNs) which explicitly models \\emph{distributional uncertainty}. PNs do this by parameterizing a prior distribution over predictive distributions. This work focuses on uncertainty for classification and evaluates PNs on the tasks of identifying out-of-distribution (OOD) samples and detecting misclassification on the MNIST and CIFAR-10 datasets, where they are found to outperform previous methods. Experiments on synthetic and MNIST and CIFAR-10 data show that unlike previous non-Bayesian methods PNs are able to distinguish between data and distributional uncertainty.", "bibtex": "@inproceedings{NEURIPS2018_3ea2db50,\n author = {Malinin, Andrey and Gales, Mark},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Predictive Uncertainty Estimation via Prior Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3ea2db50e62ceefceaf70a9d9a56a6f4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3ea2db50e62ceefceaf70a9d9a56a6f4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3ea2db50e62ceefceaf70a9d9a56a6f4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3ea2db50e62ceefceaf70a9d9a56a6f4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3ea2db50e62ceefceaf70a9d9a56a6f4-Reviews.html", "metareview": "", "pdf_size": 966844, "gs_citation": 1173, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11856860188602429224&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Engineering, University of Cambridge; Department of Engineering, University of Cambridge", "aff_domain": "cam.ac.uk;eng.cam.ac.uk", "email": "cam.ac.uk;eng.cam.ac.uk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3ea2db50e62ceefceaf70a9d9a56a6f4-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "Department of Engineering", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Preference Based Adaptation for Learning Objectives", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11751", "id": "11751", "author_site": "Yao-Xiang Ding, Zhi-Hua Zhou", "author": "Yao-Xiang Ding; Zhi-Hua Zhou", "abstract": "In many real-world learning tasks, it is hard to directly optimize the true performance measures, meanwhile choosing the right surrogate objectives is also difficult. Under this situation, it is desirable to incorporate an optimization of objective process into the learning loop based on weak modeling of the relationship between the true measure and the objective. In this work, we discuss the task of objective adaptation, in which the learner iteratively adapts the learning objective to the underlying true objective based on the preference feedback from an oracle. We show that when the objective can be linearly parameterized, this preference based learning problem can be solved by utilizing the dueling bandit model. A novel sampling based algorithm DL^2M is proposed to learn the optimal parameter, which enjoys strong theoretical guarantees and efficient empirical performance. To avoid learning a hypothesis from scratch after each objective function update, a boosting based hypothesis adaptation approach is proposed to efficiently adapt any pre-learned element hypothesis to the current objective. We apply the overall approach to multi-label learning, and show that the proposed approach achieves significant performance under various multi-label performance measures.", "bibtex": "@inproceedings{NEURIPS2018_d4031374,\n author = {Ding, Yao-Xiang and Zhou, Zhi-Hua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Preference Based Adaptation for Learning Objectives},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d403137434343677b98efc88cbd5493d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d403137434343677b98efc88cbd5493d-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d403137434343677b98efc88cbd5493d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d403137434343677b98efc88cbd5493d-Reviews.html", "metareview": "", "pdf_size": 231089, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5630412472473361597&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, 210023, China; National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, 210023, China", "aff_domain": "lamda.nju.edu.cn;lamda.nju.edu.cn", "email": "lamda.nju.edu.cn;lamda.nju.edu.cn", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d403137434343677b98efc88cbd5493d-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "National Key Laboratory for Novel Software Technology", "aff_unique_url": "http://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Nanjing", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Privacy Amplification by Subsampling: Tight Analyses via Couplings and Divergences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11608", "id": "11608", "author_site": "Borja Balle, Gilles Barthe, Marco Gaboardi", "author": "Borja Balle; Gilles Barthe; Marco Gaboardi", "abstract": "Differential privacy comes equipped with multiple analytical tools for the\ndesign of private data analyses. One important tool is the so-called \"privacy\namplification by subsampling\" principle, which ensures that a differentially\nprivate mechanism run on a random subsample of a population provides higher\nprivacy guarantees than when run on the entire population. Several instances\nof this principle have been studied for different random subsampling methods,\neach with an ad-hoc analysis. In this paper we present a general method that\nrecovers and improves prior analyses, yields lower bounds and derives new\ninstances of privacy amplification by subsampling. Our method leverages a\ncharacterization of differential privacy as a divergence which emerged in the\nprogram verification community. Furthermore, it introduces new tools,\nincluding advanced joint convexity and privacy profiles, which might be of\nindependent interest.", "bibtex": "@inproceedings{NEURIPS2018_3b5020bb,\n author = {Balle, Borja and Barthe, Gilles and Gaboardi, Marco},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Privacy Amplification by Subsampling: Tight Analyses via Couplings and Divergences},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3b5020bb891119b9f5130f1fea9bd773-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3b5020bb891119b9f5130f1fea9bd773-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3b5020bb891119b9f5130f1fea9bd773-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3b5020bb891119b9f5130f1fea9bd773-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3b5020bb891119b9f5130f1fea9bd773-Reviews.html", "metareview": "", "pdf_size": 417387, "gs_citation": 475, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12183166386654292985&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Amazon Research; IMDEA Software Institute; University at Buffalo, SUNY", "aff_domain": "amazon.co.uk;imdea.org;buffalo.edu", "email": "amazon.co.uk;imdea.org;buffalo.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3b5020bb891119b9f5130f1fea9bd773-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Amazon;IMDEA Software Institute;University at Buffalo", "aff_unique_dep": "Amazon Research;;", "aff_unique_url": "https://www.amazon.science;https://www.imdea.org/;https://www.buffalo.edu", "aff_unique_abbr": "Amazon;IMDEA;UB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Buffalo", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Spain" }, { "title": "Probabilistic Matrix Factorization for Automated Machine Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11337", "id": "11337", "author_site": "Nicolo Fusi, Rishit Sheth, Melih Elibol", "author": "Nicolo Fusi; Rishit Sheth; Melih Elibol", "abstract": "In order to achieve state-of-the-art performance, modern machine learning techniques require careful data pre-processing and hyperparameter tuning. Moreover, given the ever increasing number of machine learning models being developed, model selection is becoming increasingly important. Automating the selection and tuning of machine learning pipelines, which can include different data pre-processing methods and machine learning models, has long been one of the goals of the machine learning community. \nIn this paper, we propose to solve this meta-learning task by combining ideas from collaborative filtering and Bayesian optimization. Specifically, we use a probabilistic matrix factorization model to transfer knowledge across experiments performed in hundreds of different datasets and use an acquisition function to guide the exploration of the space of possible ML pipelines. In our experiments, we show that our approach quickly identifies high-performing pipelines across a wide range of datasets, significantly outperforming the current state-of-the-art.", "bibtex": "@inproceedings{NEURIPS2018_b59a51a3,\n author = {Fusi, Nicolo and Sheth, Rishit and Elibol, Melih},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Probabilistic Matrix Factorization for Automated Machine Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b59a51a3c0bf9c5228fde841714f523a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b59a51a3c0bf9c5228fde841714f523a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b59a51a3c0bf9c5228fde841714f523a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b59a51a3c0bf9c5228fde841714f523a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b59a51a3c0bf9c5228fde841714f523a-Reviews.html", "metareview": "", "pdf_size": 752408, "gs_citation": 174, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6902330776298089199&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Microsoft Research, New England; Microsoft Research, New England; EECS, University of California, Berkeley + Microsoft Research, New England", "aff_domain": "microsoft.com;microsoft.com;cs.berkeley.edu", "email": "microsoft.com;microsoft.com;cs.berkeley.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b59a51a3c0bf9c5228fde841714f523a-Abstract.html", "aff_unique_index": "0;0;1+0", "aff_unique_norm": "Microsoft;University of California, Berkeley", "aff_unique_dep": "Microsoft Research;EECS", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/newengland;https://www.berkeley.edu", "aff_unique_abbr": "MSR;UC Berkeley", "aff_campus_unique_index": "0;0;1+0", "aff_campus_unique": "New England;Berkeley", "aff_country_unique_index": "0;0;0+0", "aff_country_unique": "United States" }, { "title": "Probabilistic Model-Agnostic Meta-Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11905", "id": "11905", "author_site": "Chelsea Finn, Kelvin Xu, Sergey Levine", "author": "Chelsea Finn; Kelvin Xu; Sergey Levine", "abstract": "Meta-learning for few-shot learning entails acquiring a prior over previous tasks and experiences, such that new tasks be learned from small amounts of data. However, a critical challenge in few-shot learning is task ambiguity: even when a powerful prior can be meta-learned from a large number of prior tasks, a small dataset for a new task can simply be too ambiguous to acquire a single model (e.g., a classifier) for that task that is accurate. In this paper, we propose a probabilistic meta-learning algorithm that can sample models for a new task from a model distribution. Our approach extends model-agnostic meta-learning, which adapts to new tasks via gradient descent, to incorporate a parameter distribution that is trained via a variational lower bound. At meta-test time, our algorithm adapts via a simple procedure that injects noise into gradient descent, and at meta-training time, the model is trained such that this stochastic adaptation procedure produces samples from the approximate model posterior. Our experimental results show that our method can sample plausible classifiers and regressors in ambiguous few-shot learning problems. We also show how reasoning about ambiguity can also be used for downstream active learning problems.", "bibtex": "@inproceedings{NEURIPS2018_8e2c381d,\n author = {Finn, Chelsea and Xu, Kelvin and Levine, Sergey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Probabilistic Model-Agnostic Meta-Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8e2c381d4dd04f1c55093f22c59c3a08-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8e2c381d4dd04f1c55093f22c59c3a08-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8e2c381d4dd04f1c55093f22c59c3a08-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8e2c381d4dd04f1c55093f22c59c3a08-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8e2c381d4dd04f1c55093f22c59c3a08-Reviews.html", "metareview": "", "pdf_size": 1695344, "gs_citation": 888, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7618634517687128500&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "UC Berkeley; UC Berkeley; UC Berkeley", "aff_domain": "eecs.berkeley.edu;eecs.berkeley.edu;eecs.berkeley.edu", "email": "eecs.berkeley.edu;eecs.berkeley.edu;eecs.berkeley.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8e2c381d4dd04f1c55093f22c59c3a08-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Probabilistic Neural Programmed Networks for Scene Generation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11400", "id": "11400", "author_site": "Zhiwei Deng, Jiacheng Chen, YIFANG FU, Greg Mori", "author": "Zhiwei Deng; Jiacheng Chen; YIFANG FU; Greg Mori", "abstract": "In this paper we address the text to scene image generation problem. Generative models that capture the variability in complicated scenes containing rich semantics is a grand goal of image generation. Complicated scene images contain rich visual elements, compositional visual concepts, and complicated relations between objects. Generative models, as an analysis-by-synthesis process, should encompass the following three core components: 1) the generation process that composes the scene; 2) what are the primitive visual elements and how are they composed; 3) the rendering of abstract concepts into their pixel-level realizations. We propose PNP-Net, a variational auto-encoder framework that addresses these three challenges: it flexibly composes images with a dynamic network structure, learns a set of distribution transformers that can compose distributions based on semantics, and decodes samples from these distributions into realistic images.", "bibtex": "@inproceedings{NEURIPS2018_06964dce,\n author = {Deng, Zhiwei and Chen, Jiacheng and FU, YIFANG and Mori, Greg},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Probabilistic Neural Programmed Networks for Scene Generation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/06964dce9addb1c5cb5d6e3d9838f733-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/06964dce9addb1c5cb5d6e3d9838f733-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/06964dce9addb1c5cb5d6e3d9838f733-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/06964dce9addb1c5cb5d6e3d9838f733-Reviews.html", "metareview": "", "pdf_size": 799362, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7658453227892507452&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Simon Fraser University; Simon Fraser University; Simon Fraser University; Simon Fraser University", "aff_domain": "sfu.ca;sfu.ca;sfu.ca;cs.sfu.ca", "email": "sfu.ca;sfu.ca;sfu.ca;cs.sfu.ca", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/06964dce9addb1c5cb5d6e3d9838f733-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Simon Fraser University", "aff_unique_dep": "", "aff_unique_url": "https://www.sfu.ca", "aff_unique_abbr": "SFU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Processing of missing data by neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11279", "id": "11279", "author_site": "Marek \u015amieja, \u0141ukasz Struski, Jacek Tabor, Bartosz Zieli\u0144ski, Przemys\u0142aw Spurek", "author": "Marek \u015amieja; \u0141ukasz Struski; Jacek Tabor; Bartosz Zieli\u0144ski; Przemys\u0142aw Spurek", "abstract": "We propose a general, theoretically justified mechanism for processing missing data by neural networks. Our idea is to replace typical neuron's response in the first hidden layer by its expected value. This approach can be applied for various types of networks at minimal cost in their modification. Moreover, in contrast to recent approaches, it does not require complete data for training. Experimental results performed on different types of architectures show that our method gives better results than typical imputation strategies and other methods dedicated for incomplete data.", "bibtex": "@inproceedings{NEURIPS2018_411ae1bf,\n author = {\\'{S}mieja, Marek and Struski, \\L ukasz and Tabor, Jacek and Zieli\\'{n}ski, Bartosz and Spurek, Przemys\\l aw},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Processing of missing data by neural networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/411ae1bf081d1674ca6091f8c59a266f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/411ae1bf081d1674ca6091f8c59a266f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/411ae1bf081d1674ca6091f8c59a266f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/411ae1bf081d1674ca6091f8c59a266f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/411ae1bf081d1674ca6091f8c59a266f-Reviews.html", "metareview": "", "pdf_size": 403369, "gs_citation": 173, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8626650856385111699&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Faculty of Mathematics and Computer Science, Jagiellonian University; Faculty of Mathematics and Computer Science, Jagiellonian University; Faculty of Mathematics and Computer Science, Jagiellonian University; Faculty of Mathematics and Computer Science, Jagiellonian University; Faculty of Mathematics and Computer Science, Jagiellonian University", "aff_domain": "uj.edu.pl;uj.edu.pl;uj.edu.pl;uj.edu.pl;uj.edu.pl", "email": "uj.edu.pl;uj.edu.pl;uj.edu.pl;uj.edu.pl;uj.edu.pl", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/411ae1bf081d1674ca6091f8c59a266f-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Jagiellonian University", "aff_unique_dep": "Faculty of Mathematics and Computer Science", "aff_unique_url": "https://www.uj.edu.pl", "aff_unique_abbr": "UJ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Poland" }, { "title": "Provable Gaussian Embedding with One Observation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11653", "id": "11653", "author_site": "Ming Yu, Zhuoran Yang, Tuo Zhao, Mladen Kolar, Zhaoran Wang", "author": "Ming Yu; Zhuoran Yang; Tuo Zhao; Mladen Kolar; Zhaoran Wang", "abstract": "The success of machine learning methods heavily relies on having an appropriate representation for data at hand. Traditionally, machine learning approaches relied on user-defined heuristics to extract features encoding structural information about data. However, recently there has been a surge in approaches that learn how to encode the data automatically in a low dimensional space. Exponential family embedding provides a probabilistic framework for learning low-dimensional representation for various types of high-dimensional data. Though successful in practice, theoretical underpinnings for exponential family embeddings have not been established. In this paper, we study the Gaussian embedding model and develop the first theoretical results for exponential family embedding models. First, we show that, under a mild condition, the embedding structure can be learned from one observation by leveraging the parameter sharing between different contexts even though the data are dependent with each other. Second, we study properties of two algorithms used for learning the embedding structure and establish convergence results for each of them. The first algorithm is based on a convex relaxation, while the other solved the non-convex formulation of the problem directly. Experiments demonstrate the effectiveness of our approach.", "bibtex": "@inproceedings{NEURIPS2018_32fdab65,\n author = {Yu, Ming and Yang, Zhuoran and Zhao, Tuo and Kolar, Mladen and Wang, Zhaoran},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Provable Gaussian Embedding with One Observation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/32fdab6559cdfa4f167f8c31b9199643-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/32fdab6559cdfa4f167f8c31b9199643-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/32fdab6559cdfa4f167f8c31b9199643-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/32fdab6559cdfa4f167f8c31b9199643-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/32fdab6559cdfa4f167f8c31b9199643-Reviews.html", "metareview": "", "pdf_size": 563116, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10610719123375539098&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Booth School of Business, University of Chicago, Chicago, IL; Department of Operations Research and Financial Engineering, Princeton University, Princeton, NJ; School of Industrial and Systems Engineering, Georgia Institute of Technology, Atlanta, GA; Booth School of Business, University of Chicago, Chicago, IL; Department of Industrial Engineering and Management Sciences, Northwestern University, Evanston, IL", "aff_domain": "uchicago.edu; ; ; ; ", "email": "uchicago.edu; ; ; ; ", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/32fdab6559cdfa4f167f8c31b9199643-Abstract.html", "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University of Chicago;Princeton University;Georgia Institute of Technology;Northwestern University", "aff_unique_dep": "Booth School of Business;Department of Operations Research and Financial Engineering;School of Industrial and Systems Engineering;Department of Industrial Engineering and Management Sciences", "aff_unique_url": "https://www.chicagobooth.edu;https://www.princeton.edu;https://www.gatech.edu;https://www.northwestern.edu", "aff_unique_abbr": "UChicago;Princeton;Georgia Tech;NU", "aff_campus_unique_index": "0;1;2;0;3", "aff_campus_unique": "Chicago;Princeton;Atlanta;Evanston", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Provable Variational Inference for Constrained Log-Submodular Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11277", "id": "11277", "author_site": "Josip Djolonga, Stefanie Jegelka, Andreas Krause", "author": "Josip Djolonga; Stefanie Jegelka; Andreas Krause", "abstract": "Submodular maximization problems appear in several areas of machine learning and data science, as many useful modelling concepts such as diversity and coverage satisfy this natural diminishing returns property. Because the data defining these functions, as well as the decisions made with the computed solutions, are subject to statistical noise and randomness, it is arguably necessary to go beyond computing a single approximate optimum and quantify its inherent uncertainty. To this end, we define a rich class of probabilistic models associated with constrained submodular maximization problems. These capture log-submodular dependencies of arbitrary order between the variables, but also satisfy hard combinatorial constraints. Namely, the variables are assumed to take on one of \u2014 possibly exponentially many \u2014 set of states, which form the bases of a matroid. To perform inference in these models we design novel variational inference algorithms, which carefully leverage the combinatorial and probabilistic properties of these objects. In addition to providing completely tractable and well-understood variational approximations, our approach results in the minimization of a convex upper bound on the log-partition function. The bound can be efficiently evaluated using greedy algorithms and optimized using any first-order method. Moreover, for the case of facility location and weighted coverage functions, we prove the first constant factor guarantee in this setting \u2014 an efficiently certifiable e/(e-1) approximation of the log-partition function. Finally, we empirically demonstrate the effectiveness of our approach on several instances.", "bibtex": "@inproceedings{NEURIPS2018_0c0a7566,\n author = {Djolonga, Josip and Jegelka, Stefanie and Krause, Andreas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Provable Variational Inference for Constrained Log-Submodular Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0c0a7566915f4f24853fc4192689aa7e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0c0a7566915f4f24853fc4192689aa7e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0c0a7566915f4f24853fc4192689aa7e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0c0a7566915f4f24853fc4192689aa7e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0c0a7566915f4f24853fc4192689aa7e-Reviews.html", "metareview": "", "pdf_size": 1800380, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17493477326831327496&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Dept. of Computer Science, ETH Z\u00fcrich; CSAIL, MIT; Dept. of Computer Science, ETH Z\u00fcrich", "aff_domain": "inf.ethz.ch;csail.mit.edu;ethz.ch", "email": "inf.ethz.ch;csail.mit.edu;ethz.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0c0a7566915f4f24853fc4192689aa7e-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "ETH Zurich;Massachusetts Institute of Technology", "aff_unique_dep": "Dept. of Computer Science;Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.ethz.ch;https://www.csail.mit.edu", "aff_unique_abbr": "ETHZ;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Provably Correct Automatic Sub-Differentiation for Qualified Programs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11686", "id": "11686", "author_site": "Sham Kakade, Jason Lee", "author": "Sham M. Kakade; Jason Lee", "abstract": "The \\emph{Cheap Gradient Principle}~\\citep{Griewank:2008:EDP:1455489} --- the computational cost of computing a $d$-dimensional vector of partial derivatives of a scalar function is nearly the same (often within a factor of $5$) as that of simply computing the scalar function itself --- is of central importance in optimization; it allows us to quickly obtain (high-dimensional) gradients of scalar loss functions which are subsequently used in black box gradient-based optimization procedures. The current state of affairs is markedly different with regards to computing sub-derivatives: widely used ML libraries, including TensorFlow and PyTorch, do \\emph{not} correctly compute (generalized) sub-derivatives even on simple differentiable examples. This work considers the question: is there a \\emph{Cheap Sub-gradient Principle}? Our main result shows that, under certain restrictions on our library of non-smooth functions (standard in non-linear programming), provably correct generalized sub-derivatives can be computed at a computational cost that is within a (dimension-free) factor of $6$ of the cost of computing the scalar function itself.", "bibtex": "@inproceedings{NEURIPS2018_142c65e0,\n author = {Kakade, Sham M and Lee, Jason D},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Provably Correct Automatic Sub-Differentiation for Qualified Programs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/142c65e00f4f7cf2e6c4c996e34005df-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/142c65e00f4f7cf2e6c4c996e34005df-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/142c65e00f4f7cf2e6c4c996e34005df-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/142c65e00f4f7cf2e6c4c996e34005df-Reviews.html", "metareview": "", "pdf_size": 288893, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11413892697781629989&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "University of Washington; University of Southern California", "aff_domain": "cs.washington.edu;marshall.usc.edu", "email": "cs.washington.edu;marshall.usc.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/142c65e00f4f7cf2e6c4c996e34005df-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University of Washington;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://www.usc.edu", "aff_unique_abbr": "UW;USC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Proximal Graphical Event Models", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11779", "id": "11779", "author_site": "Debarun Bhattacharjya, Dharmashankar Subramanian, Tian Gao", "author": "Debarun Bhattacharjya; Dharmashankar Subramanian; Tian Gao", "abstract": "Event datasets include events that occur irregularly over the timeline and are prevalent in numerous domains. We introduce proximal graphical event models (PGEM) as a representation of such datasets. PGEMs belong to a broader family of models that characterize relationships between various types of events, where the rate of occurrence of an event type depends only on whether or not its parents have occurred in the most recent history. The main advantage over the state of the art models is that they are entirely data driven and do not require additional inputs from the user, which can require knowledge of the domain such as choice of basis functions or hyperparameters in graphical event models. We theoretically justify our learning of optimal windows for parental history and the choices of parental sets, and the algorithm are sound and complete in terms of parent structure learning. We present additional efficient heuristics for learning PGEMs from data, demonstrating their effectiveness on synthetic and real datasets.", "bibtex": "@inproceedings{NEURIPS2018_f1ababf1,\n author = {Bhattacharjya, Debarun and Subramanian, Dharmashankar and Gao, Tian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Proximal Graphical Event Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f1ababf130ee6a25f12da7478af8f1ac-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f1ababf130ee6a25f12da7478af8f1ac-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f1ababf130ee6a25f12da7478af8f1ac-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f1ababf130ee6a25f12da7478af8f1ac-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f1ababf130ee6a25f12da7478af8f1ac-Reviews.html", "metareview": "", "pdf_size": 572747, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8864283488764304198&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "IBM Research; IBM Research; IBM Research", "aff_domain": "us.ibm.com;us.ibm.com;us.ibm.com", "email": "us.ibm.com;us.ibm.com;us.ibm.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f1ababf130ee6a25f12da7478af8f1ac-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "IBM", "aff_unique_dep": "IBM Research", "aff_unique_url": "https://www.ibm.com/research", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Proximal SCOPE for Distributed Sparse Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11633", "id": "11633", "author_site": "Shenyi Zhao, Gong-Duo Zhang, Ming-Wei Li, Wu-Jun Li", "author": "Shenyi Zhao; Gong-Duo Zhang; Ming-Wei Li; Wu-Jun Li", "abstract": "Distributed sparse learning with a cluster of multiple machines has attracted much attention in machine learning, especially for large-scale applications with high-dimensional data. One popular way to implement sparse learning is to use L1 regularization. In this paper, we propose a novel method, called proximal SCOPE (pSCOPE), for distributed sparse learning with L1 regularization. pSCOPE is based on a cooperative autonomous local learning (CALL) framework. In the CALL framework of pSCOPE, we find that the data partition affects the convergence of the learning procedure, and subsequently we define a metric to measure the goodness of a data partition. Based on the defined metric, we theoretically prove that pSCOPE is convergent with a linear convergence rate if the data partition is good enough. We also prove that better data partition implies faster convergence rate. Furthermore, pSCOPE is also communication efficient. Experimental results on real data sets show that pSCOPE can outperform other state-of-the-art distributed methods for sparse learning.", "bibtex": "@inproceedings{NEURIPS2018_f4334c13,\n author = {Zhao, Shenyi and Zhang, Gong-Duo and Li, Ming-Wei and Li, Wu-Jun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Proximal SCOPE for Distributed Sparse Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f4334c131c781e2a6f0a5e34814c8147-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f4334c131c781e2a6f0a5e34814c8147-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f4334c131c781e2a6f0a5e34814c8147-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f4334c131c781e2a6f0a5e34814c8147-Reviews.html", "metareview": "", "pdf_size": 902269, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10254708286445910327&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "National Key Lab. for Novel Software Tech. + Dept. of Comp. Sci. and Tech. + Nanjing University, Nanjing 210023, China; National Key Lab. for Novel Software Tech. + Dept. of Comp. Sci. and Tech. + Nanjing University, Nanjing 210023, China; National Key Lab. for Novel Software Tech. + Dept. of Comp. Sci. and Tech. + Nanjing University, Nanjing 210023, China; National Key Lab. for Novel Software Tech. + Dept. of Comp. Sci. and Tech. + Nanjing University, Nanjing 210023, China", "aff_domain": "lamda.nju.edu.cn;lamda.nju.edu.cn;lamda.nju.edu.cn;nju.edu.cn", "email": "lamda.nju.edu.cn;lamda.nju.edu.cn;lamda.nju.edu.cn;nju.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f4334c131c781e2a6f0a5e34814c8147-Abstract.html", "aff_unique_index": "0+1+2;0+1+2;0+1+2;0+1+2", "aff_unique_norm": "National Key Laboratory for Novel Software Technology;University Affiliation;Nanjing University", "aff_unique_dep": "National Key Laboratory for Novel Software Technology;Department of Computer Science and Technology;", "aff_unique_url": ";;http://www.nju.edu.cn", "aff_unique_abbr": ";;Nanjing U", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Nanjing", "aff_country_unique_index": "0+0;0+0;0+0;0+0", "aff_country_unique": "China;" }, { "title": "Q-learning with Nearest Neighbors", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11316", "id": "11316", "author_site": "Devavrat Shah, Qiaomin Xie", "author": "Devavrat Shah; Qiaomin Xie", "abstract": "We consider model-free reinforcement learning for infinite-horizon discounted Markov Decision Processes (MDPs) with a continuous state space and unknown transition kernel, when only a single sample path under an arbitrary policy of the system is available. We consider the Nearest Neighbor Q-Learning (NNQL) algorithm to learn the optimal Q function using nearest neighbor regression method. As the main contribution, we provide tight finite sample analysis of the convergence rate. In particular, for MDPs with a $d$-dimensional state space and the discounted factor $\\gamma \\in (0,1)$, given an arbitrary sample path with ``covering time'' $L$, we establish that the algorithm is guaranteed to output an $\\varepsilon$-accurate estimate of the optimal Q-function using $\\Ot(L/(\\varepsilon^3(1-\\gamma)^7))$ samples. For instance, for a well-behaved MDP, the covering time of the sample path under the purely random policy scales as $\\Ot(1/\\varepsilon^d),$ so the sample complexity scales as $\\Ot(1/\\varepsilon^{d+3}).$ Indeed, we establish a lower bound that argues that the dependence of $ \\Omegat(1/\\varepsilon^{d+2})$ is necessary.", "bibtex": "@inproceedings{NEURIPS2018_309fee4e,\n author = {Shah, Devavrat and Xie, Qiaomin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Q-learning with Nearest Neighbors},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/309fee4e541e51de2e41f21bebb342aa-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/309fee4e541e51de2e41f21bebb342aa-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/309fee4e541e51de2e41f21bebb342aa-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/309fee4e541e51de2e41f21bebb342aa-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/309fee4e541e51de2e41f21bebb342aa-Reviews.html", "metareview": "", "pdf_size": 526474, "gs_citation": 102, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15483823731905462010&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Massachusetts Institute of Technology+Laboratory for Information and Decision Systems (LIDS)+Department of EECS+Statistics and Data Science Center at MIT; Massachusetts Institute of Technology+Laboratory for Information and Decision Systems (LIDS)", "aff_domain": "mit.edu;mit.edu", "email": "mit.edu;mit.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/309fee4e541e51de2e41f21bebb342aa-Abstract.html", "aff_unique_index": "0+0+1+0;0+0", "aff_unique_norm": "Massachusetts Institute of Technology;University of California, Berkeley", "aff_unique_dep": ";Department of Electrical Engineering and Computer Sciences", "aff_unique_url": "https://web.mit.edu;https://www.berkeley.edu", "aff_unique_abbr": "MIT;UC Berkeley", "aff_campus_unique_index": "1+2+1;1", "aff_campus_unique": ";Cambridge;Berkeley", "aff_country_unique_index": "0+0+0+0;0+0", "aff_country_unique": "United States" }, { "title": "Quadratic Decomposable Submodular Function Minimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11125", "id": "11125", "author_site": "Pan Li, Niao He, Olgica Milenkovic", "author": "Pan Li; Niao He; Olgica Milenkovic", "abstract": "We introduce a new convex optimization problem, termed quadratic decomposable submodular function minimization. The problem is closely related to decomposable submodular function minimization and arises in many learning on graphs and hypergraphs settings, such as graph-based semi-supervised learning and PageRank. We approach the problem via a new dual strategy and describe an objective that may be optimized via random coordinate descent (RCD) methods and projections onto cones. We also establish the linear convergence rate of the RCD algorithm and develop efficient projection algorithms with provable performance guarantees. Numerical experiments in semi-supervised learning on hypergraphs confirm the efficiency of the proposed algorithm and demonstrate the significant improvements in prediction accuracy with respect to state-of-the-art methods.", "bibtex": "@inproceedings{NEURIPS2018_58ae749f,\n author = {Li, Pan and He, Niao and Milenkovic, Olgica},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Quadratic Decomposable Submodular Function Minimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/58ae749f25eded36f486bc85feb3f0ab-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/58ae749f25eded36f486bc85feb3f0ab-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/58ae749f25eded36f486bc85feb3f0ab-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/58ae749f25eded36f486bc85feb3f0ab-Reviews.html", "metareview": "", "pdf_size": 459357, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9668278212333240026&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "UIUC; UIUC; UIUC", "aff_domain": "illinois.edu;illinois.edu;illinois.edu", "email": "illinois.edu;illinois.edu;illinois.edu", "github": "https://github.com/lipan00123/QDSDM", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/58ae749f25eded36f486bc85feb3f0ab-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://www illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Quadrature-based features for kernel approximation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11872", "id": "11872", "author_site": "Marina Munkhoeva, Yermek Kapushev, Evgeny Burnaev, Ivan Oseledets", "author": "Marina Munkhoeva; Yermek Kapushev; Evgeny Burnaev; Ivan Oseledets", "abstract": "We consider the problem of improving kernel approximation via randomized feature maps. These maps arise as Monte Carlo approximation to integral representations of kernel functions and scale up kernel methods for larger datasets. Based on an efficient numerical integration technique, we propose a unifying approach that reinterprets the previous random features methods and extends to better estimates of the kernel approximation. We derive the convergence behavior and conduct an extensive empirical study that supports our hypothesis.", "bibtex": "@inproceedings{NEURIPS2018_6e923226,\n author = {Munkhoeva, Marina and Kapushev, Yermek and Burnaev, Evgeny and Oseledets, Ivan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Quadrature-based features for kernel approximation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6e923226e43cd6fac7cfe1e13ad000ac-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6e923226e43cd6fac7cfe1e13ad000ac-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6e923226e43cd6fac7cfe1e13ad000ac-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6e923226e43cd6fac7cfe1e13ad000ac-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6e923226e43cd6fac7cfe1e13ad000ac-Reviews.html", "metareview": "", "pdf_size": 889590, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5888962935039944528&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Skolkovo Institute of Science and Technology; Skolkovo Institute of Science and Technology; Skolkovo Institute of Science and Technology + Institute of Numerical Mathematics of the Russian Academy of Sciences; Skolkovo Institute of Science and Technology + Institute of Numerical Mathematics of the Russian Academy of Sciences", "aff_domain": ";;;", "email": ";;;", "github": "https://github.com/maremun/quffka", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6e923226e43cd6fac7cfe1e13ad000ac-Abstract.html", "aff_unique_index": "0;0;0+1;0+1", "aff_unique_norm": "Skolkovo Institute of Science and Technology;Russian Academy of Sciences", "aff_unique_dep": ";Institute of Numerical Mathematics", "aff_unique_url": "https://www.skoltech.ru;https://www.ras.ru", "aff_unique_abbr": "Skoltech;RAS", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+0;0+0", "aff_country_unique": "Russian Federation" }, { "title": "Quantifying Learning Guarantees for Convex but Inconsistent Surrogates", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11089", "id": "11089", "author_site": "Kirill Struminsky, Simon Lacoste-Julien, Anton Osokin", "author": "Kirill Struminsky; Simon Lacoste-Julien; Anton Osokin", "abstract": "We study consistency properties of machine learning methods based on minimizing convex surrogates. We extend the recent framework of Osokin et al. (2017) for the quantitative analysis of consistency properties to the case of inconsistent surrogates. Our key technical contribution consists in a new lower bound on the calibration function for the quadratic surrogate, which is non-trivial (not always zero) for inconsistent cases. The new bound allows to quantify the level of inconsistency of the setting and shows how learning with inconsistent surrogates can have guarantees on sample complexity and optimization difficulty. We apply our theory to two concrete cases: multi-class classification with the tree-structured loss and ranking with the mean average precision loss. The results show the approximation-computation trade-offs caused by inconsistent surrogates and their potential benefits.", "bibtex": "@inproceedings{NEURIPS2018_39461a19,\n author = {Struminsky, Kirill and Lacoste-Julien, Simon and Osokin, Anton},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Quantifying Learning Guarantees for Convex but Inconsistent Surrogates},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/39461a19e9eddfb385ea76b26521ea48-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/39461a19e9eddfb385ea76b26521ea48-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/39461a19e9eddfb385ea76b26521ea48-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/39461a19e9eddfb385ea76b26521ea48-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/39461a19e9eddfb385ea76b26521ea48-Reviews.html", "metareview": "", "pdf_size": 520271, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5863180738288366002&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/39461a19e9eddfb385ea76b26521ea48-Abstract.html" }, { "title": "Query Complexity of Bayesian Private Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11252", "id": "11252", "author": "Kuang Xu", "abstract": "We study the query complexity of Bayesian Private Learning: a learner wishes to locate a random target within an interval by submitting queries, in the presence of an adversary who observes all of her queries but not the responses. How many queries are necessary and sufficient in order for the learner to accurately estimate the target, while simultaneously concealing the target from the adversary? \n\nOur main result is a query complexity lower bound that is tight up to the first order. We show that if the learner wants to estimate the target within an error of $\\epsilon$, while ensuring that no adversary estimator can achieve a constant additive error with probability greater than $1/L$, then the query complexity is on the order of $L\\log(1/\\epsilon)$ as $\\epsilon \\to 0$. Our result demonstrates that increased privacy, as captured by $L$, comes at the expense of a \\emph{multiplicative} increase in query complexity. The proof builds on Fano's inequality and properties of certain proportional-sampling estimators.", "bibtex": "@inproceedings{NEURIPS2018_7bccfde7,\n author = {Xu, Kuang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Query Complexity of Bayesian Private Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7bccfde7714a1ebadf06c5f4cea752c1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7bccfde7714a1ebadf06c5f4cea752c1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7bccfde7714a1ebadf06c5f4cea752c1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7bccfde7714a1ebadf06c5f4cea752c1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7bccfde7714a1ebadf06c5f4cea752c1-Reviews.html", "metareview": "", "pdf_size": 465237, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6255504793196276665&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Stanford Graduate School of Business", "aff_domain": "stanford.edu", "email": "stanford.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7bccfde7714a1ebadf06c5f4cea752c1-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Graduate School of Business", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford GSB", "aff_campus_unique_index": "0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Query K-means Clustering and the Double Dixie Cup Problem", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11642", "id": "11642", "author_site": "Eli Chien, Chao Pan, Olgica Milenkovic", "author": "I Chien; Chao Pan; Olgica Milenkovic", "abstract": "We consider the problem of approximate $K$-means clustering with outliers and side information provided by same-cluster queries and possibly noisy answers. Our solution shows that, under some mild assumptions on the smallest cluster size, one can obtain an $(1+\\epsilon)$-approximation for the optimal potential with probability at least $1-\\delta$, where $\\epsilon>0$ and $\\delta\\in(0,1)$, using an expected number of $O(\\frac{K^3}{\\epsilon \\delta})$ noiseless same-cluster queries and comparison-based clustering of complexity $O(ndK + \\frac{K^3}{\\epsilon \\delta})$; here, $n$ denotes the number of points and $d$ the dimension of space. Compared to a handful of other known approaches that perform importance sampling to account for small cluster sizes, the proposed query technique reduces the number of queries by a factor of roughly $O(\\frac{K^6}{\\epsilon^3})$, at the cost of possibly missing very small clusters. We extend this settings to the case where some queries to the oracle produce erroneous information, and where certain points, termed outliers, do not belong to any clusters. Our proof techniques differ from previous methods used for $K$-means clustering analysis, as they rely on estimating the sizes of the clusters and the number of points needed for accurate centroid estimation and subsequent nontrivial generalizations of the double Dixie cup problem. We illustrate the performance of the proposed algorithm both on synthetic and real datasets, including MNIST and CIFAR $10$.", "bibtex": "@inproceedings{NEURIPS2018_0655f117,\n author = {Chien, I and Pan, Chao and Milenkovic, Olgica},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Query K-means Clustering and the Double Dixie Cup Problem},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0655f117444fc1911ab9c6f6b0139051-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0655f117444fc1911ab9c6f6b0139051-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0655f117444fc1911ab9c6f6b0139051-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0655f117444fc1911ab9c6f6b0139051-Reviews.html", "metareview": "", "pdf_size": 588587, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8633754529655920818&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department ECE, UIUC; Department ECE, UIUC; Department ECE, UIUC", "aff_domain": "illinois.edu;illinois.edu;illinois.edu", "email": "illinois.edu;illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0655f117444fc1911ab9c6f6b0139051-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.uiuc.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "REFUEL: Exploring Sparse Features in Deep Reinforcement Learning for Fast Disease Diagnosis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11705", "id": "11705", "author_site": "Yu-Shao Peng, Kai-Fu Tang, Hsuan-Tien Lin, Edward Chang", "author": "Yu-Shao Peng; Kai-Fu Tang; Hsuan-Tien Lin; Edward Chang", "abstract": "This paper proposes REFUEL, a reinforcement learning method with two techniques: {\\em reward shaping} and {\\em feature rebuilding}, to improve the performance of online symptom checking for disease diagnosis. Reward shaping can guide the search of policy towards better directions. Feature rebuilding can guide the agent to learn correlations between features. Together, they can find symptom queries that can yield positive responses from a patient with high probability. Experimental results justify that the two techniques in REFUEL allows the symptom checker to identify the disease more rapidly and accurately.", "bibtex": "@inproceedings{NEURIPS2018_b5a1d925,\n author = {Peng, Yu-Shao and Tang, Kai-Fu and Lin, Hsuan-Tien and Chang, Edward},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {REFUEL: Exploring Sparse Features in Deep Reinforcement Learning for Fast Disease Diagnosis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b5a1d925221b37e2e399f7b319038ba0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b5a1d925221b37e2e399f7b319038ba0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b5a1d925221b37e2e399f7b319038ba0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b5a1d925221b37e2e399f7b319038ba0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b5a1d925221b37e2e399f7b319038ba0-Reviews.html", "metareview": "", "pdf_size": 1437665, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4728705040395526728&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "aff": "HTC Research & Healthcare; HTC Research & Healthcare; Department of CSIE, National Taiwan University; HTC Research & Healthcare", "aff_domain": "htc.com;htc.com;csie.ntu.edu.tw;htc.com", "email": "htc.com;htc.com;csie.ntu.edu.tw;htc.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b5a1d925221b37e2e399f7b319038ba0-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "HTC Corporation;National Taiwan University", "aff_unique_dep": "Research & Healthcare;Department of CSIE", "aff_unique_url": "https://www.htc.com;https://www.ntu.edu.tw", "aff_unique_abbr": "HTC;NTU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Random Feature Stein Discrepancies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11202", "id": "11202", "author_site": "Jonathan Huggins, Lester Mackey", "author": "Jonathan Huggins; Lester Mackey", "abstract": "Computable Stein discrepancies have been deployed for a variety of applications, ranging from sampler selection in posterior inference to approximate Bayesian inference to goodness-of-fit testing. Existing convergence-determining Stein discrepancies admit strong theoretical guarantees but suffer from a computational cost that grows quadratically in the sample size. While linear-time Stein discrepancies have been proposed for goodness-of-fit testing, they exhibit avoidable degradations in testing power\u2014even when power is explicitly optimized. To address these shortcomings, we introduce feature Stein discrepancies (\u03a6SDs), a new family of quality measures that can be cheaply approximated using importance sampling. We show how to construct \u03a6SDs that provably determine the convergence of a sample to its target and develop high-accuracy approximations\u2014random \u03a6SDs (R\u03a6SDs)\u2014which are computable in near-linear time. In our experiments with sampler selection for approximate posterior inference and goodness-of-fit testing, R\u03a6SDs perform as well or better than quadratic-time KSDs while being orders of magnitude faster to compute.", "bibtex": "@inproceedings{NEURIPS2018_0f840be9,\n author = {Huggins, Jonathan and Mackey, Lester},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Random Feature Stein Discrepancies},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0f840be9b8db4d3fbd5ba2ce59211f55-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0f840be9b8db4d3fbd5ba2ce59211f55-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0f840be9b8db4d3fbd5ba2ce59211f55-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0f840be9b8db4d3fbd5ba2ce59211f55-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0f840be9b8db4d3fbd5ba2ce59211f55-Reviews.html", "metareview": "", "pdf_size": 1789067, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11066687693763291331&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Biostatistics, Harvard; Microsoft Research New England", "aff_domain": "mit.edu;microsoft.com", "email": "mit.edu;microsoft.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0f840be9b8db4d3fbd5ba2ce59211f55-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Harvard University;Microsoft", "aff_unique_dep": "Department of Biostatistics;Microsoft Research", "aff_unique_url": "https://www.harvard.edu;https://www.microsoft.com/en-us/research/group/microsoft-research-new-england", "aff_unique_abbr": "Harvard;MSR NE", "aff_campus_unique_index": "1", "aff_campus_unique": ";New England", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Randomized Prior Functions for Deep Reinforcement Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11823", "id": "11823", "author_site": "Ian Osband, John Aslanides, Albin Cassirer", "author": "Ian Osband; John Aslanides; Albin Cassirer", "abstract": "Dealing with uncertainty is essential for efficient reinforcement learning.\nThere is a growing literature on uncertainty estimation for deep learning from fixed datasets, but many of the most popular approaches are poorly-suited to sequential decision problems.\nOther methods, such as bootstrap sampling, have no mechanism for uncertainty that does not come from the observed data.\nWe highlight why this can be a crucial shortcoming and propose a simple remedy through addition of a randomized untrainable `prior' network to each ensemble member.\nWe prove that this approach is efficient with linear representations, provide simple illustrations of its efficacy with nonlinear representations and show that this approach scales to large-scale problems far better than previous attempts.", "bibtex": "@inproceedings{NEURIPS2018_5a7b238b,\n author = {Osband, Ian and Aslanides, John and Cassirer, Albin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Randomized Prior Functions for Deep Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5a7b238ba0f6502e5d6be14424b20ded-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5a7b238ba0f6502e5d6be14424b20ded-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5a7b238ba0f6502e5d6be14424b20ded-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5a7b238ba0f6502e5d6be14424b20ded-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5a7b238ba0f6502e5d6be14424b20ded-Reviews.html", "metareview": "", "pdf_size": 1562878, "gs_citation": 499, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14451589847980470978&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "DeepMind; DeepMind; DeepMind", "aff_domain": "google.com;google.com;google.com", "email": "google.com;google.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5a7b238ba0f6502e5d6be14424b20ded-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "DeepMind", "aff_unique_dep": "", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Re-evaluating evaluation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11330", "id": "11330", "author_site": "David Balduzzi, Karl Tuyls, Julien Perolat, Thore Graepel", "author": "David Balduzzi; Karl Tuyls; Julien Perolat; Thore Graepel", "abstract": "Progress in machine learning is measured by careful evaluation on problems of outstanding common interest. However, the proliferation of benchmark suites and environments, adversarial attacks, and other complications has diluted the basic evaluation model by overwhelming researchers with choices. Deliberate or accidental cherry picking is increasingly likely, and designing well-balanced evaluation suites requires increasing effort. In this paper we take a step back and propose Nash averaging. The approach builds on a detailed analysis of the algebraic structure of evaluation in two basic scenarios: agent-vs-agent and agent-vs-task. The key strength of Nash averaging is that it automatically adapts to redundancies in evaluation data, so that results are not biased by the incorporation of easy tasks or weak agents. Nash averaging thus encourages maximally inclusive evaluation -- since there is no harm (computational cost aside) from including all available tasks and agents.", "bibtex": "@inproceedings{NEURIPS2018_cdf1035c,\n author = {Balduzzi, David and Tuyls, Karl and Perolat, Julien and Graepel, Thore},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Re-evaluating evaluation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cdf1035c34ec380218a8cc9a43d438f9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cdf1035c34ec380218a8cc9a43d438f9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cdf1035c34ec380218a8cc9a43d438f9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cdf1035c34ec380218a8cc9a43d438f9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cdf1035c34ec380218a8cc9a43d438f9-Reviews.html", "metareview": "", "pdf_size": 540334, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12761570685717938244&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "DeepMind; DeepMind; DeepMind; DeepMind", "aff_domain": "g o o g l e . c o m;g o o g l e . c o m;g o o g l e . c o m;g o o g l e . c o m", "email": "g o o g l e . c o m;g o o g l e . c o m;g o o g l e . c o m;g o o g l e . c o m", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cdf1035c34ec380218a8cc9a43d438f9-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "DeepMind", "aff_unique_dep": "", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Realistic Evaluation of Deep Semi-Supervised Learning Algorithms", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11327", "id": "11327", "author_site": "Avital Oliver, Augustus Odena, Colin A Raffel, Ekin Dogus Cubuk, Ian Goodfellow", "author": "Avital Oliver; Augustus Odena; Colin A Raffel; Ekin Dogus Cubuk; Ian Goodfellow", "abstract": "Semi-supervised learning (SSL) provides a powerful framework for leveraging unlabeled data when labels are limited or expensive to obtain. SSL algorithms based on deep neural networks have recently proven successful on standard benchmark tasks. However, we argue that these benchmarks fail to address many issues that SSL algorithms would face in real-world applications. After creating a unified reimplementation of various widely-used SSL techniques, we test them in a suite of experiments designed to address these issues. We find that the performance of simple baselines which do not use unlabeled data is often underreported, SSL methods differ in sensitivity to the amount of labeled and unlabeled data, and performance can degrade substantially when the unlabeled dataset contains out-of-distribution examples. To help guide SSL research towards real-world applicability, we make our unified reimplemention and evaluation platform publicly available.", "bibtex": "@inproceedings{NEURIPS2018_c1fea270,\n author = {Oliver, Avital and Odena, Augustus and Raffel, Colin A and Cubuk, Ekin Dogus and Goodfellow, Ian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Realistic Evaluation of Deep Semi-Supervised Learning Algorithms},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c1fea270c48e8079d8ddf7d06d26ab52-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c1fea270c48e8079d8ddf7d06d26ab52-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c1fea270c48e8079d8ddf7d06d26ab52-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c1fea270c48e8079d8ddf7d06d26ab52-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c1fea270c48e8079d8ddf7d06d26ab52-Reviews.html", "metareview": "", "pdf_size": 806074, "gs_citation": 1259, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15456844754123849487&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Google Brain; Google Brain; Google Brain; Google Brain; Google Brain", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com", "github": "https://github.com/brain-research/realistic-ssl-evaluation", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c1fea270c48e8079d8ddf7d06d26ab52-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Brain", "aff_unique_url": "https://brain.google.com", "aff_unique_abbr": "Google Brain", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Rectangular Bounding Process", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11732", "id": "11732", "author_site": "Xuhui Fan, Bin Li, Scott SIsson", "author": "Xuhui Fan; Bin Li; Scott SIsson", "abstract": "Stochastic partition models divide a multi-dimensional space into a number of rectangular regions, such that the data within each region exhibit certain types of homogeneity. Due to the nature of their partition strategy, existing partition models may create many unnecessary divisions in sparse regions when trying to describe data in dense regions. To avoid this problem we introduce a new parsimonious partition model -- the Rectangular Bounding Process (RBP) -- to efficiently partition multi-dimensional spaces, by employing a bounding strategy to enclose data points within rectangular bounding boxes. Unlike existing approaches, the RBP possesses several attractive theoretical properties that make it a powerful nonparametric partition prior on a hypercube. In particular, the RBP is self-consistent and as such can be directly extended from a finite hypercube to infinite (unbounded) space. We apply the RBP to regression trees and relational models as a flexible partition prior. The experimental results validate the merit of the RBP {in rich yet parsimonious expressiveness} compared to the state-of-the-art methods.", "bibtex": "@inproceedings{NEURIPS2018_8ce87bdd,\n author = {Fan, Xuhui and Li, Bin and SIsson, Scott},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Rectangular Bounding Process},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8ce87bdda85cd44f14de9afb86491884-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8ce87bdda85cd44f14de9afb86491884-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8ce87bdda85cd44f14de9afb86491884-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8ce87bdda85cd44f14de9afb86491884-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8ce87bdda85cd44f14de9afb86491884-Reviews.html", "metareview": "", "pdf_size": 1274889, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10618275895500216203&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "School of Mathematics & Statistics, University of New South Wales; School of Computer Science, Fudan University; School of Mathematics & Statistics, University of New South Wales", "aff_domain": "unsw.edu.au;fudan.edu.cn;unsw.edu.au", "email": "unsw.edu.au;fudan.edu.cn;unsw.edu.au", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8ce87bdda85cd44f14de9afb86491884-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of New South Wales;Fudan University", "aff_unique_dep": "School of Mathematics & Statistics;School of Computer Science", "aff_unique_url": "https://www.unsw.edu.au;https://www.fudan.edu.cn", "aff_unique_abbr": "UNSW;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Australia;China" }, { "title": "Recurrent Relational Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11339", "id": "11339", "author_site": "Rasmus Berg Palm, Ulrich Paquet, Ole Winther", "author": "Rasmus Palm; Ulrich Paquet; Ole Winther", "abstract": "This paper is concerned with learning to solve tasks that require a chain of interde-\npendent steps of relational inference, like answering complex questions about the\nrelationships between objects, or solving puzzles where the smaller elements of a\nsolution mutually constrain each other. We introduce the recurrent relational net-\nwork, a general purpose module that operates on a graph representation of objects.\nAs a generalization of Santoro et al. [2017]\u2019s relational network, it can augment\nany neural network model with the capacity to do many-step relational reasoning.\nWe achieve state of the art results on the bAbI textual question-answering dataset\nwith the recurrent relational network, consistently solving 20/20 tasks. As bAbI is\nnot particularly challenging from a relational reasoning point of view, we introduce\nPretty-CLEVR, a new diagnostic dataset for relational reasoning. In the Pretty-\nCLEVR set-up, we can vary the question to control for the number of relational\nreasoning steps that are required to obtain the answer. Using Pretty-CLEVR, we\nprobe the limitations of multi-layer perceptrons, relational and recurrent relational\nnetworks. Finally, we show how recurrent relational networks can learn to solve\nSudoku puzzles from supervised training data, a challenging task requiring upwards\nof 64 steps of relational reasoning. We achieve state-of-the-art results amongst\ncomparable methods by solving 96.6% of the hardest Sudoku puzzles.", "bibtex": "@inproceedings{NEURIPS2018_b9f94c77,\n author = {Palm, Rasmus and Paquet, Ulrich and Winther, Ole},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Recurrent Relational Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b9f94c77652c9a76fc8a442748cd54bd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b9f94c77652c9a76fc8a442748cd54bd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b9f94c77652c9a76fc8a442748cd54bd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b9f94c77652c9a76fc8a442748cd54bd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b9f94c77652c9a76fc8a442748cd54bd-Reviews.html", "metareview": "", "pdf_size": 314367, "gs_citation": 237, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13875098514508819290&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Technical University of Denmark+Tradeshift; DeepMind; Technical University of Denmark", "aff_domain": "dtu.dk;google.com;dtu.dk", "email": "dtu.dk;google.com;dtu.dk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b9f94c77652c9a76fc8a442748cd54bd-Abstract.html", "aff_unique_index": "0+1;2;0", "aff_unique_norm": "Technical University of Denmark;Tradeshift;DeepMind", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tek.dk;https://tradeshift.com;https://deepmind.com", "aff_unique_abbr": "DTU;Tradeshift;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;1;0", "aff_country_unique": "Denmark;United Kingdom" }, { "title": "Recurrent Transformer Networks for Semantic Correspondence", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11594", "id": "11594", "author_site": "Seungryong Kim, Stephen Lin, Sangryul Jeon, Dongbo Min, Kwanghoon Sohn", "author": "Seungryong Kim; Stephen Lin; SANG RYUL JEON; Dongbo Min; Kwanghoon Sohn", "abstract": "We present recurrent transformer networks (RTNs) for obtaining dense correspondences between semantically similar images. Our networks accomplish this through an iterative process of estimating spatial transformations between the input images and using these transformations to generate aligned convolutional activations. By directly estimating the transformations between an image pair, rather than employing spatial transformer networks to independently normalize each individual image, we show that greater accuracy can be achieved. This process is conducted in a recursive manner to refine both the transformation estimates and the feature representations. In addition, a technique is presented for weakly-supervised training of RTNs that is based on a proposed classification loss. With RTNs, state-of-the-art performance is attained on several benchmarks for semantic correspondence.", "bibtex": "@inproceedings{NEURIPS2018_e4a93f03,\n author = {Kim, Seungryong and Lin, Stephen and JEON, SANG RYUL and Min, Dongbo and Sohn, Kwanghoon},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Recurrent Transformer Networks for Semantic Correspondence},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e4a93f0332b2519177ed55741ea4e5e7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e4a93f0332b2519177ed55741ea4e5e7-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e4a93f0332b2519177ed55741ea4e5e7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e4a93f0332b2519177ed55741ea4e5e7-Reviews.html", "metareview": "", "pdf_size": 11873242, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9734562710899504835&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 16, "aff": "Yonsei University; Microsoft Research; Yonsei University; Ewha Womans University; Yonsei University", "aff_domain": "yonsei.ac.kr;microsoft.com;yonsei.ac.kr;ewha.ac.kr;yonsei.ac.kr", "email": "yonsei.ac.kr;microsoft.com;yonsei.ac.kr;ewha.ac.kr;yonsei.ac.kr", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e4a93f0332b2519177ed55741ea4e5e7-Abstract.html", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Yonsei University;Microsoft;Ewha Womans University", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.yonsei.ac.kr;https://www.microsoft.com/en-us/research;http://www.ewha.ac.kr", "aff_unique_abbr": "Yonsei;MSR;Ewha", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "South Korea;United States" }, { "title": "Recurrent World Models Facilitate Policy Evolution", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11254", "id": "11254", "author_site": "David Ha, J\u00fcrgen Schmidhuber", "author": "David Ha; J\u00fcrgen Schmidhuber", "abstract": "A generative recurrent neural network is quickly trained in an unsupervised manner to model popular reinforcement learning environments through compressed spatio-temporal representations. The world model's extracted features are fed into compact and simple policies trained by evolution, achieving state of the art results in various environments. We also train our agent entirely inside of an environment generated by its own internal world model, and transfer this policy back into the actual environment. Interactive version of this paper is available at https://worldmodels.github.io", "bibtex": "@inproceedings{NEURIPS2018_2de5d166,\n author = {Ha, David and Schmidhuber, J\\\"{u}rgen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Recurrent World Models Facilitate Policy Evolution},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2de5d16682c3c35007e4e92982f1a2ba-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2de5d16682c3c35007e4e92982f1a2ba-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2de5d16682c3c35007e4e92982f1a2ba-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2de5d16682c3c35007e4e92982f1a2ba-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2de5d16682c3c35007e4e92982f1a2ba-Reviews.html", "metareview": "", "pdf_size": 1432305, "gs_citation": 1268, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15327043406005665564&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Google Brain, Tokyo, Japan; NNAISENSE + The Swiss AI Lab, IDSIA (USI & SUPSI)", "aff_domain": "google.com;idsia.ch", "email": "google.com;idsia.ch", "github": "", "project": "https://worldmodels.github.io", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2de5d16682c3c35007e4e92982f1a2ba-Abstract.html", "aff_unique_index": "0;1+2", "aff_unique_norm": "Google;NNAISENSE;IDSIA", "aff_unique_dep": "Google Brain;;Swiss AI Lab", "aff_unique_url": "https://brain.google.com;https://www.nnaiseNSE.com;https://www.idsia.ch/", "aff_unique_abbr": "Google Brain;NNAISENSE;IDSIA", "aff_campus_unique_index": "0;", "aff_campus_unique": "Tokyo;", "aff_country_unique_index": "0;1+2", "aff_country_unique": "Japan;China;Switzerland" }, { "title": "Recurrently Controlled Recurrent Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11465", "id": "11465", "author_site": "Yi Tay, Anh Tuan Luu, Siu Cheung Hui", "author": "Yi Tay; Anh Tuan Luu; Siu Cheung Hui", "abstract": "Recurrent neural networks (RNNs) such as long short-term memory and gated recurrent units are pivotal building blocks across a broad spectrum of sequence modeling problems. This paper proposes a recurrently controlled recurrent network (RCRN) for expressive and powerful sequence encoding. More concretely, the key idea behind our approach is to learn the recurrent gating functions using recurrent networks. Our architecture is split into two components - a controller cell and a listener cell whereby the recurrent controller actively influences the compositionality of the listener cell. We conduct extensive experiments on a myriad of tasks in the NLP domain such as sentiment analysis (SST, IMDb, Amazon reviews, etc.), question classification (TREC), entailment classification (SNLI, SciTail), answer selection (WikiQA, TrecQA) and reading comprehension (NarrativeQA). Across all 26 datasets, our results demonstrate that RCRN not only consistently outperforms BiLSTMs but also stacked BiLSTMs, suggesting that our controller architecture might be a suitable replacement for the widely adopted stacked architecture. Additionally, RCRN achieves state-of-the-art results on several well-established datasets.", "bibtex": "@inproceedings{NEURIPS2018_16026d60,\n author = {Tay, Yi and Luu, Anh Tuan and Hui, Siu Cheung},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Recurrently Controlled Recurrent Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/16026d60ff9b54410b3435b403afd226-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/16026d60ff9b54410b3435b403afd226-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/16026d60ff9b54410b3435b403afd226-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/16026d60ff9b54410b3435b403afd226-Reviews.html", "metareview": "", "pdf_size": 320963, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=119621077163762339&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Nanyang Technological University; Institute for Infocomm Research; Nanyang Technological University", "aff_domain": "e.ntu.edu.sg;i2r.a-star.edu.sg;ntu.edu.sg", "email": "e.ntu.edu.sg;i2r.a-star.edu.sg;ntu.edu.sg", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/16026d60ff9b54410b3435b403afd226-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanyang Technological University;Institute for Infocomm Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "NTU;I2R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "title": "Reducing Network Agnostophobia", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11873", "id": "11873", "author_site": "Akshay Raj Dhamija, Manuel G\u00fcnther, Terrance Boult", "author": "Akshay Raj Dhamija; Manuel G\u00fcnther; Terrance Boult", "abstract": "Agnostophobia, the fear of the unknown, can be experienced by deep learning engineers while applying their networks to real-world applications. Unfortunately, network behavior is not well defined for inputs far from a networks training set. In an uncontrolled environment, networks face many instances that are not of interest to them and have to be rejected in order to avoid a false positive. This problem has previously been tackled by researchers by either a) thresholding softmax, which by construction cannot return \"none of the known classes\", or b) using an additional background or garbage class. In this paper, we show that both of these approaches help, but are generally insufficient when previously unseen classes are encountered. We also introduce a new evaluation metric that focuses on comparing the performance of multiple approaches in scenarios where such unseen classes or unknowns are encountered. Our major contributions are simple yet effective Entropic Open-Set and Objectosphere losses that train networks using negative samples from some classes. These novel losses are designed to maximize entropy for unknown inputs while increasing separation in deep feature space by modifying magnitudes of known and unknown samples. Experiments on networks trained to classify classes from MNIST and CIFAR-10 show that our novel loss functions are significantly better at dealing with unknown inputs from datasets such as Devanagari, NotMNIST, CIFAR-100 and SVHN.", "bibtex": "@inproceedings{NEURIPS2018_48db7158,\n author = {Dhamija, Akshay Raj and G\\\"{u}nther, Manuel and Boult, Terrance},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Reducing Network Agnostophobia},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/48db71587df6c7c442e5b76cc723169a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/48db71587df6c7c442e5b76cc723169a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/48db71587df6c7c442e5b76cc723169a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/48db71587df6c7c442e5b76cc723169a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/48db71587df6c7c442e5b76cc723169a-Reviews.html", "metareview": "", "pdf_size": 7431227, "gs_citation": 442, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13549236386686072567&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/48db71587df6c7c442e5b76cc723169a-Abstract.html" }, { "title": "Regret Bounds for Online Portfolio Selection with a Cardinality Constraint", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12001", "id": "12001", "author_site": "Shinji Ito, Daisuke Hatano, Hanna Sumita, Akihiro Yabe, Takuro Fukunaga, Naonori Kakimura, Ken-Ichi Kawarabayashi", "author": "Shinji Ito; Daisuke Hatano; Hanna Sumita; Akihiro Yabe; Takuro Fukunaga; Naonori Kakimura; Ken-Ichi Kawarabayashi", "abstract": "Online portfolio selection is a sequential decision-making problem in which a learner repetitively selects a portfolio over a set of assets, aiming to maximize long-term return. In this paper, we study the problem with the cardinality constraint that the number of assets in a portfolio is restricted to be at most k, and consider two scenarios: (i) in the full-feedback setting, the learner can observe price relatives (rates of return to cost) for all assets, and (ii) in the bandit-feedback setting, the learner can observe price relatives only for invested assets. We propose efficient algorithms for these scenarios that achieve sublinear regrets. We also provide regret (statistical) lower bounds for both scenarios which nearly match the upper bounds when k is a constant. In addition, we give a computational lower bound which implies that no algorithm maintains both computational efficiency, as well as a small regret upper bound.", "bibtex": "@inproceedings{NEURIPS2018_6c1e55ec,\n author = {Ito, Shinji and Hatano, Daisuke and Sumita, Hanna and Yabe, Akihiro and Fukunaga, Takuro and Kakimura, Naonori and Kawarabayashi, Ken-Ichi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Regret Bounds for Online Portfolio Selection with a Cardinality Constraint},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6c1e55ec7c43dc51a37472ddcbd756fb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6c1e55ec7c43dc51a37472ddcbd756fb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6c1e55ec7c43dc51a37472ddcbd756fb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6c1e55ec7c43dc51a37472ddcbd756fb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6c1e55ec7c43dc51a37472ddcbd756fb-Reviews.html", "metareview": "", "pdf_size": 525528, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1533379875320546040&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;;;;;", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6c1e55ec7c43dc51a37472ddcbd756fb-Abstract.html" }, { "title": "Regret Bounds for Robust Adaptive Control of the Linear Quadratic Regulator", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11415", "id": "11415", "author_site": "Sarah Dean, Horia Mania, Nikolai Matni, Benjamin Recht, Stephen Tu", "author": "Sarah Dean; Horia Mania; Nikolai Matni; Benjamin Recht; Stephen Tu", "abstract": "We consider adaptive control of the Linear Quadratic Regulator (LQR), where an\nunknown linear system is controlled subject to quadratic costs. Leveraging recent\ndevelopments in the estimation of linear systems and in robust controller synthesis,\nwe present the first provably polynomial time algorithm that achieves sub-linear\nregret on this problem. We further study the interplay between regret minimization\nand parameter estimation by proving a lower bound on the expected regret in\nterms of the exploration schedule used by any algorithm. Finally, we conduct a\nnumerical study comparing our robust adaptive algorithm to other methods from\nthe adaptive LQR literature, and demonstrate the flexibility of our proposed method\nby extending it to a demand forecasting problem subject to state constraints.", "bibtex": "@inproceedings{NEURIPS2018_0ae3f79a,\n author = {Dean, Sarah and Mania, Horia and Matni, Nikolai and Recht, Benjamin and Tu, Stephen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Regret Bounds for Robust Adaptive Control of the Linear Quadratic Regulator},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0ae3f79a30234b6c45a6f7d298ba1310-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0ae3f79a30234b6c45a6f7d298ba1310-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/0ae3f79a30234b6c45a6f7d298ba1310-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0ae3f79a30234b6c45a6f7d298ba1310-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0ae3f79a30234b6c45a6f7d298ba1310-Reviews.html", "metareview": "", "pdf_size": 960203, "gs_citation": 332, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5260259850427352148&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0ae3f79a30234b6c45a6f7d298ba1310-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Regret bounds for meta Bayesian optimization with an unknown Gaussian process prior", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11991", "id": "11991", "author_site": "Zi Wang, Beomjoon Kim, Leslie Kaelbling", "author": "Zi Wang; Beomjoon Kim; Leslie Pack Kaelbling", "abstract": "Bayesian optimization usually assumes that a Bayesian prior is given. However, the strong theoretical guarantees in Bayesian optimization are often regrettably compromised in practice because of unknown parameters in the prior. In this paper, we adopt a variant of empirical Bayes and show that, by estimating the Gaussian process prior from offline data sampled from the same prior and constructing unbiased estimators of the posterior, variants of both GP-UCB and \\emph{probability of improvement} achieve a near-zero regret bound, which decreases to a constant proportional to the observational noise as the number of offline data and the number of online evaluations increase. Empirically, we have verified our approach on challenging simulated robotic problems featuring task and motion planning.", "bibtex": "@inproceedings{NEURIPS2018_41f860e3,\n author = {Wang, Zi and Kim, Beomjoon and Kaelbling, Leslie Pack},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Regret bounds for meta Bayesian optimization with an unknown Gaussian process prior},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/41f860e3b7f548abc1f8b812059137bf-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/41f860e3b7f548abc1f8b812059137bf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/41f860e3b7f548abc1f8b812059137bf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/41f860e3b7f548abc1f8b812059137bf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/41f860e3b7f548abc1f8b812059137bf-Reviews.html", "metareview": "", "pdf_size": 781642, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17688880368262090655&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 18, "aff": "MIT CSAIL; MIT CSAIL; MIT CSAIL", "aff_domain": "csail.mit.edu;mit.edu;csail.mit.edu", "email": "csail.mit.edu;mit.edu;csail.mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/41f860e3b7f548abc1f8b812059137bf-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.csail.mit.edu", "aff_unique_abbr": "MIT CSAIL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Regularization Learning Networks: Deep Learning for Tabular Datasets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11154", "id": "11154", "author_site": "Ira Shavitt, Eran Segal", "author": "Ira Shavitt; Eran Segal", "abstract": "Despite their impressive performance, Deep Neural Networks (DNNs) typically underperform Gradient Boosting Trees (GBTs) on many tabular-dataset learning tasks. We propose that applying a different regularization coefficient to each weight might boost the performance of DNNs by allowing them to make more use of the more relevant inputs. However, this will lead to an intractable number of hyperparameters. Here, we introduce Regularization Learning Networks (RLNs), which overcome this challenge by introducing an efficient hyperparameter tuning scheme which minimizes a new Counterfactual Loss. Our results show that RLNs significantly improve DNNs on tabular datasets, and achieve comparable results to GBTs, with the best performance achieved with an ensemble that combines GBTs and RLNs. RLNs produce extremely sparse networks, eliminating up to 99.8% of the network edges and 82% of the input features, thus providing more interpretable models and reveal the importance that the network assigns to different inputs. RLNs could efficiently learn a single network in datasets that comprise both tabular and unstructured data, such as in the setting of medical imaging accompanied by electronic health records. An open source implementation of RLN can be found at https://github.com/irashavitt/regularization", "bibtex": "@inproceedings{NEURIPS2018_500e75a0,\n author = {Shavitt, Ira and Segal, Eran},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Regularization Learning Networks: Deep Learning for Tabular Datasets},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/500e75a036dc2d7d2fec5da1b71d36cc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/500e75a036dc2d7d2fec5da1b71d36cc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/500e75a036dc2d7d2fec5da1b71d36cc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/500e75a036dc2d7d2fec5da1b71d36cc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/500e75a036dc2d7d2fec5da1b71d36cc-Reviews.html", "metareview": "", "pdf_size": 661972, "gs_citation": 137, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12900371387873290272&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Weizmann Institute of Science; Weizmann Institute of Science", "aff_domain": "gmail.com;weizmann.ac.il", "email": "gmail.com;weizmann.ac.il", "github": "https://github.com/irashavitt/regularization_learning_networks", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/500e75a036dc2d7d2fec5da1b71d36cc-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Weizmann Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.weizmann.org.il", "aff_unique_abbr": "Weizmann", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Regularizing by the Variance of the Activations' Sample-Variances", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11223", "id": "11223", "author_site": "Etai Littwin, Lior Wolf", "author": "Etai Littwin; Lior Wolf", "abstract": "Normalization techniques play an important role in supporting efficient and often more effective training of deep neural networks. While conventional methods explicitly normalize the activations, we suggest to add a loss term instead. This new loss term encourages the variance of the activations to be stable and not vary from one random mini-batch to the next. As we prove, this encourages the activations to be distributed around a few distinct modes. We also show that if the inputs are from a mixture of two Gaussians, the new loss would either join the two together, or separate between them optimally in the LDA sense, depending on the prior probabilities. Finally, we are able to link the new regularization term to the batchnorm method, which provides it with a regularization perspective. Our experiments demonstrate an improvement in accuracy over the batchnorm technique for both CNNs and fully connected networks.", "bibtex": "@inproceedings{NEURIPS2018_522a9ae9,\n author = {Littwin, Etai and Wolf, Lior},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Regularizing by the Variance of the Activations\\textquotesingle Sample-Variances},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/522a9ae9a99880d39e5daec35375e999-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/522a9ae9a99880d39e5daec35375e999-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/522a9ae9a99880d39e5daec35375e999-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/522a9ae9a99880d39e5daec35375e999-Reviews.html", "metareview": "", "pdf_size": 578885, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15140084165126889964&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Tel Aviv University; Tel Aviv University + Facebook AI Research", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/522a9ae9a99880d39e5daec35375e999-Abstract.html", "aff_unique_index": "0;0+1", "aff_unique_norm": "Tel Aviv University;Meta", "aff_unique_dep": ";Facebook AI Research", "aff_unique_url": "https://www.tau.ac.il;https://research.facebook.com", "aff_unique_abbr": "TAU;FAIR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+1", "aff_country_unique": "Israel;United States" }, { "title": "Reinforced Continual Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11111", "id": "11111", "author_site": "Ju Xu, Zhanxing Zhu", "author": "Ju Xu; Zhanxing Zhu", "abstract": "Most artificial intelligence models are limited in their ability to solve new tasks faster, without forgetting previously acquired knowledge. The recently emerging paradigm of continual learning aims to solve this issue, in which the model learns various tasks in a sequential fashion. In this work, a novel approach for continual learning is proposed, which searches for the best neural architecture for each coming task via sophisticatedly designed reinforcement learning strategies. We name it as Reinforced Continual Learning. Our method not only has good performance on preventing catastrophic forgetting but also fits new tasks well. The experiments on sequential classification tasks for variants of MNIST and CIFAR-100 datasets demonstrate that the proposed approach outperforms existing continual learning alternatives for deep networks.", "bibtex": "@inproceedings{NEURIPS2018_cee63112,\n author = {Xu, Ju and Zhu, Zhanxing},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Reinforced Continual Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cee631121c2ec9232f3a2f028ad5c89b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cee631121c2ec9232f3a2f028ad5c89b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cee631121c2ec9232f3a2f028ad5c89b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cee631121c2ec9232f3a2f028ad5c89b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cee631121c2ec9232f3a2f028ad5c89b-Reviews.html", "metareview": "", "pdf_size": 560519, "gs_citation": 469, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9508431037463487209&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Center for Data Science, Peking University; Center for Data Science, Peking University + Beijing Institute of Big Data Research (BIBDR)", "aff_domain": "pku.edu.cn;pku.edu.cn", "email": "pku.edu.cn;pku.edu.cn", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cee631121c2ec9232f3a2f028ad5c89b-Abstract.html", "aff_unique_index": "0;0+1", "aff_unique_norm": "Peking University;Beijing Institute of Big Data Research", "aff_unique_dep": "Center for Data Science;", "aff_unique_url": "http://www.pku.edu.cn;", "aff_unique_abbr": "PKU;BIBDR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0+0", "aff_country_unique": "China" }, { "title": "Reinforcement Learning for Solving the Vehicle Routing Problem", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11934", "id": "11934", "author_site": "MohammadReza Nazari, Afshin Oroojlooy, Lawrence Snyder, Martin Takac", "author": "MohammadReza Nazari; Afshin Oroojlooy; Lawrence Snyder; Martin Takac", "abstract": "We present an end-to-end framework for solving the Vehicle Routing Problem (VRP) using reinforcement learning. In this approach, we train a single policy model that finds near-optimal solutions for a broad range of problem instances of similar size, only by observing the reward signals and following feasibility rules. We consider a parameterized stochastic policy, and by applying a policy gradient algorithm to optimize its parameters, the trained model produces the solution as a sequence of consecutive actions in real time, without the need to re-train for every new problem instance. On capacitated VRP, our approach outperforms classical heuristics and Google's OR-Tools on medium-sized instances in solution quality with comparable computation time (after training). We demonstrate how our approach can handle problems with split delivery and explore the effect of such deliveries on the solution quality. Our proposed framework can be applied to other variants of the VRP such as the stochastic VRP, and has the potential to be applied more generally to combinatorial optimization problems", "bibtex": "@inproceedings{NEURIPS2018_9fb4651c,\n author = {Nazari, MohammadReza and Oroojlooy, Afshin and Snyder, Lawrence and Takac, Martin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Reinforcement Learning for Solving the Vehicle Routing Problem},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9fb4651c05b2ed70fba5afe0b039a550-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9fb4651c05b2ed70fba5afe0b039a550-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9fb4651c05b2ed70fba5afe0b039a550-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9fb4651c05b2ed70fba5afe0b039a550-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9fb4651c05b2ed70fba5afe0b039a550-Reviews.html", "metareview": "", "pdf_size": 332651, "gs_citation": 1551, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7637833782499120275&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 19, "aff": "Department of Industrial and Systems Engineering, Lehigh University, Bethlehem, PA 18015; Department of Industrial and Systems Engineering, Lehigh University, Bethlehem, PA 18015; Department of Industrial and Systems Engineering, Lehigh University, Bethlehem, PA 18015; Department of Industrial and Systems Engineering, Lehigh University, Bethlehem, PA 18015", "aff_domain": "lehigh.edu;lehigh.edu;lehigh.edu;lehigh.edu", "email": "lehigh.edu;lehigh.edu;lehigh.edu;lehigh.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9fb4651c05b2ed70fba5afe0b039a550-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Lehigh University", "aff_unique_dep": "Department of Industrial and Systems Engineering", "aff_unique_url": "https://www.lehigh.edu", "aff_unique_abbr": "Lehigh", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Bethlehem", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reinforcement Learning of Theorem Proving", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11842", "id": "11842", "author_site": "Cezary Kaliszyk, Josef Urban, Henryk Michalewski, Miroslav Ol\u0161\u00e1k", "author": "Cezary Kaliszyk; Josef Urban; Henryk Michalewski; Miroslav Ol\u0161\u00e1k", "abstract": "We introduce a theorem proving algorithm that uses practically no domain heuristics for guiding its connection-style proof search. Instead, it runs many Monte-Carlo simulations guided by reinforcement learning from previous proof attempts. We produce several versions of the prover, parameterized by different learning and guiding algorithms. The strongest version of the system is trained on a large corpus of mathematical problems and evaluated on previously unseen problems. The trained system solves within the same number of inferences over 40% more problems than a baseline prover, which is an unusually high improvement in this hard AI domain. To our knowledge this is the first time reinforcement learning has been convincingly applied to solving general mathematical problems on a large scale.", "bibtex": "@inproceedings{NEURIPS2018_55acf853,\n author = {Kaliszyk, Cezary and Urban, Josef and Michalewski, Henryk and Ol\\v{s}\\'{a}k, Miroslav},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Reinforcement Learning of Theorem Proving},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/55acf8539596d25624059980986aaa78-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/55acf8539596d25624059980986aaa78-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/55acf8539596d25624059980986aaa78-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/55acf8539596d25624059980986aaa78-Reviews.html", "metareview": "", "pdf_size": 307582, "gs_citation": 202, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2714588860068568498&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "University of Innsbruck; Czech Technical University in Prague; University of Warsaw+Institute of Mathematics of the Polish Academy of Sciences+deepsense.ai; Charles University", "aff_domain": "\u2217; \u2217; ; ", "email": "\u2217; \u2217; ; ", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/55acf8539596d25624059980986aaa78-Abstract.html", "aff_unique_index": "0;1;2+3+4;5", "aff_unique_norm": "University of Innsbruck;Czech Technical University;University of Warsaw;Polish Academy of Sciences;deepsense.ai;Charles University", "aff_unique_dep": ";;;Institute of Mathematics;;", "aff_unique_url": "https://www.uibk.ac.at;https://www.ctu.cz;https://www.uw.edu.pl;https://www.impan.pl/;https://deepsense.ai;https://www.cuni.cz", "aff_unique_abbr": "UIBK;CTU;UW;IM PAN;;Charles U", "aff_campus_unique_index": "1;", "aff_campus_unique": ";Prague", "aff_country_unique_index": "0;1;2+2+2;1", "aff_country_unique": "Austria;Czech Republic;Poland" }, { "title": "Reinforcement Learning with Multiple Experts: A Bayesian Model Combination Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11906", "id": "11906", "author_site": "Michael Gimelfarb, Scott Sanner, Chi-Guhn Lee", "author": "Michael Gimelfarb; Scott Sanner; Chi-Guhn Lee", "abstract": "Potential based reward shaping is a powerful technique for accelerating convergence of reinforcement learning algorithms. Typically, such information includes an estimate of the optimal value function and is often provided by a human expert or other sources of domain knowledge. However, this information is often biased or inaccurate and can mislead many reinforcement learning algorithms. In this paper, we apply Bayesian Model Combination with multiple experts in a way that learns to trust a good combination of experts as training progresses. This approach is both computationally efficient and general, and is shown numerically to improve convergence across discrete and continuous domains and different reinforcement learning algorithms.", "bibtex": "@inproceedings{NEURIPS2018_ba4002d8,\n author = {Gimelfarb, Michael and Sanner, Scott and Lee, Chi-Guhn},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Reinforcement Learning with Multiple Experts: A Bayesian Model Combination Approach},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ba4002d88b8860b6a684ade8357aba56-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ba4002d88b8860b6a684ade8357aba56-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ba4002d88b8860b6a684ade8357aba56-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ba4002d88b8860b6a684ade8357aba56-Reviews.html", "metareview": "", "pdf_size": 2079496, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7044687294882365829&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Mechanical and Industrial Engineering, University of Toronto; Mechanical and Industrial Engineering, University of Toronto; Mechanical and Industrial Engineering, University of Toronto", "aff_domain": "mail.utoronto.ca;mie.utoronto.ca;mie.utoronto.ca", "email": "mail.utoronto.ca;mie.utoronto.ca;mie.utoronto.ca", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ba4002d88b8860b6a684ade8357aba56-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "Department of Mechanical and Industrial Engineering", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Toronto", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Relating Leverage Scores and Density using Regularized Christoffel Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11180", "id": "11180", "author_site": "Edouard Pauwels, Francis Bach, Jean-Philippe Vert", "author": "Edouard Pauwels; Francis Bach; Jean-Philippe Vert", "abstract": "Statistical leverage scores emerged as a fundamental tool for matrix sketching and column sampling with applications to low rank approximation, regression, random feature learning and quadrature. Yet, the very nature of this quantity is barely understood. Borrowing ideas from the orthogonal polynomial literature, we introduce the regularized Christoffel function associated to a positive definite kernel. This uncovers a variational formulation for leverage scores for kernel methods and allows to elucidate their relationships with the chosen kernel as well as population density. Our main result quantitatively describes a decreasing relation between leverage score and population density for a broad class of kernels on Euclidean spaces. Numerical simulations support our findings.", "bibtex": "@inproceedings{NEURIPS2018_aff16212,\n author = {Pauwels, Edouard and Bach, Francis and Vert, Jean-Philippe},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Relating Leverage Scores and Density using Regularized Christoffel Functions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/aff1621254f7c1be92f64550478c56e6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/aff1621254f7c1be92f64550478c56e6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/aff1621254f7c1be92f64550478c56e6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/aff1621254f7c1be92f64550478c56e6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/aff1621254f7c1be92f64550478c56e6-Reviews.html", "metareview": "", "pdf_size": 682299, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2451168604110793904&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/aff1621254f7c1be92f64550478c56e6-Abstract.html" }, { "title": "Relational recurrent neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11703", "id": "11703", "author_site": "Adam Santoro, Ryan Faulkner, David Raposo, Jack Rae, Mike Chrzanowski, Theophane Weber, Daan Wierstra, Oriol Vinyals, Razvan Pascanu, Timothy Lillicrap", "author": "Adam Santoro; Ryan Faulkner; David Raposo; Jack Rae; Mike Chrzanowski; Theophane Weber; Daan Wierstra; Oriol Vinyals; Razvan Pascanu; Timothy Lillicrap", "abstract": "Memory-based neural networks model temporal data by leveraging an ability to remember information for long periods. It is unclear, however, whether they also have an ability to perform complex relational reasoning with the information they remember. Here, we first confirm our intuitions that standard memory architectures may struggle at tasks that heavily involve an understanding of the ways in which entities are connected -- i.e., tasks involving relational reasoning. We then improve upon these deficits by using a new memory module -- a Relational Memory Core (RMC) -- which employs multi-head dot product attention to allow memories to interact. Finally, we test the RMC on a suite of tasks that may profit from more capable relational reasoning across sequential information, and show large gains in RL domains (BoxWorld & Mini PacMan), program evaluation, and language modeling, achieving state-of-the-art results on the WikiText-103, Project Gutenberg, and GigaWord datasets.", "bibtex": "@inproceedings{NEURIPS2018_e2eabaf9,\n author = {Santoro, Adam and Faulkner, Ryan and Raposo, David and Rae, Jack and Chrzanowski, Mike and Weber, Theophane and Wierstra, Daan and Vinyals, Oriol and Pascanu, Razvan and Lillicrap, Timothy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Relational recurrent neural networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e2eabaf96372e20a9e3d4b5f83723a61-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e2eabaf96372e20a9e3d4b5f83723a61-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e2eabaf96372e20a9e3d4b5f83723a61-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e2eabaf96372e20a9e3d4b5f83723a61-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e2eabaf96372e20a9e3d4b5f83723a61-Reviews.html", "metareview": "", "pdf_size": 1299995, "gs_citation": 283, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9409251416505894911&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff": "DeepMind; DeepMind; DeepMind; DeepMind+CoMPLEX, Computer Science, University College London; DeepMind; DeepMind; DeepMind; DeepMind; DeepMind; DeepMind+CoMPLEX, Computer Science, University College London", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 10, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e2eabaf96372e20a9e3d4b5f83723a61-Abstract.html", "aff_unique_index": "0;0;0;0+1;0;0;0;0;0;0+1", "aff_unique_norm": "DeepMind;University College London", "aff_unique_dep": ";Computer Science", "aff_unique_url": "https://deepmind.com;https://www.ucl.ac.uk", "aff_unique_abbr": "DeepMind;UCL", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;0;0;0+0;0;0;0;0;0;0+0", "aff_country_unique": "United Kingdom" }, { "title": "Removing Hidden Confounding by Experimental Grounding", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12029", "id": "12029", "author_site": "Nathan Kallus, Aahlad Puli, Uri Shalit", "author": "Nathan Kallus; Aahlad Manas Puli; Uri Shalit", "abstract": "Observational data is increasingly used as a means for making individual-level causal predictions and intervention recommendations. The foremost challenge of causal inference from observational data is hidden confounding, whose presence cannot be tested in data and can invalidate any causal conclusion. Experimental data does not suffer from confounding but is usually limited in both scope and scale. We introduce a novel method of using limited experimental data to correct the hidden confounding in causal effect models trained on larger observational data, even if the observational data does not fully overlap with the experimental data. Our method makes strictly weaker assumptions than existing approaches, and we prove conditions under which it yields a consistent estimator. We demonstrate our method's efficacy using real-world data from a large educational experiment.", "bibtex": "@inproceedings{NEURIPS2018_566f0ea4,\n author = {Kallus, Nathan and Puli, Aahlad Manas and Shalit, Uri},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Removing Hidden Confounding by Experimental Grounding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/566f0ea4f6c2e947f36795c8f58ba901-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/566f0ea4f6c2e947f36795c8f58ba901-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/566f0ea4f6c2e947f36795c8f58ba901-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/566f0ea4f6c2e947f36795c8f58ba901-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/566f0ea4f6c2e947f36795c8f58ba901-Reviews.html", "metareview": "", "pdf_size": 343445, "gs_citation": 166, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10698519977534032225&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Cornell University and Cornell Tech; New York University; Technion", "aff_domain": "cornell.edu;nyu.edu;technion.ac.il", "email": "cornell.edu;nyu.edu;technion.ac.il", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/566f0ea4f6c2e947f36795c8f58ba901-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Cornell University;New York University;Technion - Israel Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cornell.edu;https://www.nyu.edu;https://www.technion.ac.il/en/", "aff_unique_abbr": "Cornell;NYU;Technion", "aff_campus_unique_index": "0", "aff_campus_unique": "Ithaca;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Removing the Feature Correlation Effect of Multiplicative Noise", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11085", "id": "11085", "author_site": "Zijun Zhang, Yining Zhang, Zongpeng Li", "author": "Zijun Zhang; Yining Zhang; Zongpeng Li", "abstract": "Multiplicative noise, including dropout, is widely used to regularize deep neural networks (DNNs), and is shown to be effective in a wide range of architectures and tasks. From an information perspective, we consider injecting multiplicative noise into a DNN as training the network to solve the task with noisy information pathways, which leads to the observation that multiplicative noise tends to increase the correlation between features, so as to increase the signal-to-noise ratio of information pathways. However, high feature correlation is undesirable, as it increases redundancy in representations. In this work, we propose non-correlating multiplicative noise (NCMN), which exploits batch normalization to remove the correlation effect in a simple yet effective way. We show that NCMN significantly improves the performance of standard multiplicative noise on image classification tasks, providing a better alternative to dropout for batch-normalized networks. Additionally, we present a unified view of NCMN and shake-shake regularization, which explains the performance gain of the latter.", "bibtex": "@inproceedings{NEURIPS2018_e7b24b11,\n author = {Zhang, Zijun and Zhang, Yining and Li, Zongpeng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Removing the Feature Correlation Effect of Multiplicative Noise},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e7b24b112a44fdd9ee93bdf998c6ca0e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e7b24b112a44fdd9ee93bdf998c6ca0e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e7b24b112a44fdd9ee93bdf998c6ca0e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e7b24b112a44fdd9ee93bdf998c6ca0e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e7b24b112a44fdd9ee93bdf998c6ca0e-Reviews.html", "metareview": "", "pdf_size": 474862, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17402472050771179089&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "University of Calgary; University of Calgary; Wuhan University", "aff_domain": "ucalgary.ca;ucalgary.ca;whu.edu.cn", "email": "ucalgary.ca;ucalgary.ca;whu.edu.cn", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e7b24b112a44fdd9ee93bdf998c6ca0e-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Calgary;Wuhan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucalgary.ca;http://www.whu.edu.cn/", "aff_unique_abbr": "U of C;WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;China" }, { "title": "RenderNet: A deep convolutional network for differentiable rendering from 3D shapes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11757", "id": "11757", "author_site": "Thu Nguyen-Phuoc, Chuan Li, Stephen Balaban, Yongliang Yang", "author": "Thu H Nguyen-Phuoc; Chuan Li; Stephen Balaban; Yongliang Yang", "abstract": "Traditional computer graphics rendering pipelines are designed for procedurally\ngenerating 2D images from 3D shapes with high performance. The nondifferentiability due to discrete operations (such as visibility computation) makes it hard to explicitly correlate rendering parameters and the resulting image, posing a significant challenge for inverse rendering tasks. Recent work on differentiable rendering achieves differentiability either by designing surrogate gradients for non-differentiable operations or via an approximate but differentiable renderer. These methods, however, are still limited when it comes to handling occlusion, and restricted to particular rendering effects. We present RenderNet, a differentiable rendering convolutional network with a novel projection unit that can render 2D images from 3D shapes. Spatial occlusion and shading calculation are automatically encoded in the network. Our experiments show that RenderNet can successfully learn to implement different shaders, and can be used in inverse rendering tasks to estimate shape, pose, lighting and texture from a single image.", "bibtex": "@inproceedings{NEURIPS2018_68d37435,\n author = {Nguyen-Phuoc, Thu H and Li, Chuan and Balaban, Stephen and Yang, Yongliang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {RenderNet: A deep convolutional network for differentiable rendering from 3D shapes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/68d3743587f71fbaa5062152985aff40-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/68d3743587f71fbaa5062152985aff40-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/68d3743587f71fbaa5062152985aff40-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/68d3743587f71fbaa5062152985aff40-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/68d3743587f71fbaa5062152985aff40-Reviews.html", "metareview": "", "pdf_size": 2838045, "gs_citation": 159, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2520359496499363195&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "University of Bath; Lambda Labs; Lambda Labs; University of Bath", "aff_domain": "bath.ac.uk;lambdalabs.com;lambdalabs.com;cs.bath.ac.uk", "email": "bath.ac.uk;lambdalabs.com;lambdalabs.com;cs.bath.ac.uk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/68d3743587f71fbaa5062152985aff40-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Bath;Lambda Labs", "aff_unique_dep": ";", "aff_unique_url": "https://www.bath.ac.uk;https://lambdalabs.com", "aff_unique_abbr": "Bath;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United Kingdom;Canada" }, { "title": "Reparameterization Gradient for Non-differentiable Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11542", "id": "11542", "author_site": "Wonyeol Lee, Hangyeol Yu, Hongseok Yang", "author": "Wonyeol Lee; Hangyeol Yu; Hongseok Yang", "abstract": "We present a new algorithm for stochastic variational inference that targets at models with non-differentiable densities. One of the key challenges in stochastic variational inference is to come up with a low-variance estimator of the gradient of a variational objective. We tackle the challenge by generalizing the reparameterization trick, one of the most effective techniques for addressing the variance issue for differentiable models, so that the trick works for non-differentiable models as well. Our algorithm splits the space of latent variables into regions where the density of the variables is differentiable, and their boundaries where the density may fail to be differentiable. For each differentiable region, the algorithm applies the standard reparameterization trick and estimates the gradient restricted to the region. For each potentially non-differentiable boundary, it uses a form of manifold sampling and computes the direction for variational parameters that, if followed, would increase the boundary\u2019s contribution to the variational objective. The sum of all the estimates becomes the gradient estimate of our algorithm. Our estimator enjoys the reduced variance of the reparameterization gradient while remaining unbiased even for non-differentiable models. The experiments with our preliminary implementation confirm the benefit of reduced variance and unbiasedness.", "bibtex": "@inproceedings{NEURIPS2018_b096577e,\n author = {Lee, Wonyeol and Yu, Hangyeol and Yang, Hongseok},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Reparameterization Gradient for Non-differentiable Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b096577e264d1ebd6b41041f392eec23-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b096577e264d1ebd6b41041f392eec23-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b096577e264d1ebd6b41041f392eec23-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b096577e264d1ebd6b41041f392eec23-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b096577e264d1ebd6b41041f392eec23-Reviews.html", "metareview": "", "pdf_size": 725257, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15564293157719874680&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b096577e264d1ebd6b41041f392eec23-Abstract.html" }, { "title": "Representation Balancing MDPs for Off-policy Policy Evaluation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11272", "id": "11272", "author_site": "Yao Liu, Omer Gottesman, Aniruddh Raghu, Matthieu Komorowski, Aldo Faisal, Finale Doshi-Velez, Emma Brunskill", "author": "Yao Liu; Omer Gottesman; Aniruddh Raghu; Matthieu Komorowski; Aldo A Faisal; Finale Doshi-Velez; Emma Brunskill", "abstract": "We study the problem of off-policy policy evaluation (OPPE) in RL. In contrast to prior work, we consider how to estimate both the individual policy value and average policy value accurately. We draw inspiration from recent work in causal reasoning, and propose a new finite sample generalization error bound for value estimates from MDP models. Using this upper bound as an objective, we develop a learning algorithm of an MDP model with a balanced representation, and show that our approach can yield substantially lower MSE in common synthetic benchmarks and a HIV treatment simulation domain.", "bibtex": "@inproceedings{NEURIPS2018_980ecd05,\n author = {Liu, Yao and Gottesman, Omer and Raghu, Aniruddh and Komorowski, Matthieu and Faisal, Aldo A and Doshi-Velez, Finale and Brunskill, Emma},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Representation Balancing MDPs for Off-policy Policy Evaluation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/980ecd059122ce2e50136bda65c25e07-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/980ecd059122ce2e50136bda65c25e07-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/980ecd059122ce2e50136bda65c25e07-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/980ecd059122ce2e50136bda65c25e07-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/980ecd059122ce2e50136bda65c25e07-Reviews.html", "metareview": "", "pdf_size": 559102, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11325221239507449364&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "Stanford University; Harvard University; Cambridge University; Imperial College London; Imperial College London; Harvard University; Stanford University", "aff_domain": "stanford.edu;fas.harvard.edu;gmail.com;gmail.com;imperial.ac.uk;seas.harvard.edu;cs.stanford.edu", "email": "stanford.edu;fas.harvard.edu;gmail.com;gmail.com;imperial.ac.uk;seas.harvard.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/980ecd059122ce2e50136bda65c25e07-Abstract.html", "aff_unique_index": "0;1;2;3;3;1;0", "aff_unique_norm": "Stanford University;Harvard University;University of Cambridge;Imperial College London", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.stanford.edu;https://www.harvard.edu;https://www.cam.ac.uk;https://www.imperial.ac.uk", "aff_unique_abbr": "Stanford;Harvard;Cambridge;ICL", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Stanford;;Cambridge", "aff_country_unique_index": "0;0;1;1;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Representation Learning for Treatment Effect Estimation from Observational Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11271", "id": "11271", "author_site": "Liuyi Yao, Sheng Li, Yaliang Li, Mengdi Huai, Jing Gao, Aidong Zhang", "author": "Liuyi Yao; Sheng Li; Yaliang Li; Mengdi Huai; Jing Gao; Aidong Zhang", "abstract": "Estimating individual treatment effect (ITE) is a challenging problem in causal inference, due to the missing counterfactuals and the selection bias. Existing ITE estimation methods mainly focus on balancing the distributions of control and treated groups, but ignore the local similarity information that is helpful. In this paper, we propose a local similarity preserved individual treatment effect (SITE) estimation method based on deep representation learning. SITE preserves local similarity and balances data distributions simultaneously, by focusing on several hard samples in each mini-batch. Experimental results on synthetic and three real-world datasets demonstrate the advantages of the proposed SITE method, compared with the state-of-the-art ITE estimation methods.", "bibtex": "@inproceedings{NEURIPS2018_a50abba8,\n author = {Yao, Liuyi and Li, Sheng and Li, Yaliang and Huai, Mengdi and Gao, Jing and Zhang, Aidong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Representation Learning for Treatment Effect Estimation from Observational Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a50abba8132a77191791390c3eb19fe7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a50abba8132a77191791390c3eb19fe7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a50abba8132a77191791390c3eb19fe7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a50abba8132a77191791390c3eb19fe7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a50abba8132a77191791390c3eb19fe7-Reviews.html", "metareview": "", "pdf_size": 958281, "gs_citation": 378, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8473125110526248121&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "SUNY at Buffalo; University of Georgia; Tencent Medical AI Lab; SUNY at Buffalo; SUNY at Buffalo; SUNY at Buffalo", "aff_domain": "buffalo.edu;uga.edu;tencent.com;buffalo.edu;buffalo.edu;buffalo.edu", "email": "buffalo.edu;uga.edu;tencent.com;buffalo.edu;buffalo.edu;buffalo.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a50abba8132a77191791390c3eb19fe7-Abstract.html", "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "State University of New York at Buffalo;University of Georgia;Tencent", "aff_unique_dep": ";;Medical AI Lab", "aff_unique_url": "https://www.buffalo.edu;https://www.uga.edu;https://www.tencent.com", "aff_unique_abbr": "SUNY Buffalo;UGA;Tencent", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Buffalo;", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Representation Learning of Compositional Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11645", "id": "11645", "author_site": "Marta Avalos, Richard Nock, Cheng Soon Ong, Julien Rouar, Ke Sun", "author": "Marta Avalos; Richard Nock; Cheng Soon Ong; Julien Rouar; Ke Sun", "abstract": "We consider the problem of learning a low dimensional representation for compositional data. Compositional data consists of a collection of nonnegative data that sum to a constant value. Since the parts of the collection are statistically dependent, many standard tools cannot be directly applied. Instead, compositional data must be first transformed before analysis. Focusing on principal component analysis (PCA), we propose an approach that allows low dimensional representation learning directly from the original data. Our approach combines the benefits of the log-ratio transformation from compositional data analysis and exponential family PCA. A key tool in its derivation is a generalization of the scaled Bregman theorem, that relates the perspective transform of a Bregman divergence to the Bregman divergence of a perspective transform and a remainder conformal divergence. Our proposed approach includes a convenient surrogate (upper bound) loss of the exponential family PCA which has an easy to optimize form. We also derive the corresponding form for nonlinear autoencoders. Experiments on simulated data and microbiome data show the promise of our method.", "bibtex": "@inproceedings{NEURIPS2018_664dd858,\n author = {Avalos, Marta and Nock, Richard and Ong, Cheng Soon and Rouar, Julien and Sun, Ke},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Representation Learning of Compositional Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/664dd858db942cad06f24ff25df56716-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/664dd858db942cad06f24ff25df56716-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/664dd858db942cad06f24ff25df56716-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/664dd858db942cad06f24ff25df56716-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/664dd858db942cad06f24ff25df56716-Reviews.html", "metareview": "", "pdf_size": 775511, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18206745691304075675&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Universit\u00e9 de Bordeaux; Data61 + the Australian National University; Data61 + the Australian National University; Universit\u00e9 de Bordeaux; Data61", "aff_domain": "u-bordeaux.fr;data61.csiro.au;data61.csiro.au;u-bordeaux.fr;data61.csiro.au", "email": "u-bordeaux.fr;data61.csiro.au;data61.csiro.au;u-bordeaux.fr;data61.csiro.au", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/664dd858db942cad06f24ff25df56716-Abstract.html", "aff_unique_index": "0;1+2;1+2;0;1", "aff_unique_norm": "Universit\u00e9 de Bordeaux;Data61;Australian National University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-bordeaux.fr;https://data61.csiro.au;https://www.anu.edu.au", "aff_unique_abbr": "U Bordeaux;Data61;ANU", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;1+1;1+1;0;1", "aff_country_unique": "France;Australia" }, { "title": "Representer Point Selection for Explaining Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11885", "id": "11885", "author_site": "Chih-Kuan Yeh, Joon Kim, Ian En-Hsu Yen, Pradeep Ravikumar", "author": "Chih-Kuan Yeh; Joon Kim; Ian En-Hsu Yen; Pradeep K Ravikumar", "abstract": "We propose to explain the predictions of a deep neural network, by pointing to the set of what we call representer points in the training set, for a given test point prediction. Specifically, we show that we can decompose the pre-activation prediction of a neural network into a linear combination of activations of training points, with the weights corresponding to what we call representer values, which thus capture the importance of that training point on the learned parameters of the network. But it provides a deeper understanding of the network than simply training point influence: with positive representer values corresponding to excitatory training points, and negative values corresponding to inhibitory points, which as we show provides considerably more insight. Our method is also much more scalable, allowing for real-time feedback in a manner not feasible with influence functions.", "bibtex": "@inproceedings{NEURIPS2018_8a7129b8,\n author = {Yeh, Chih-Kuan and Kim, Joon and Yen, Ian En-Hsu and Ravikumar, Pradeep K},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Representer Point Selection for Explaining Deep Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8a7129b8f3edd95b7d969dfc2c8e9d9d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8a7129b8f3edd95b7d969dfc2c8e9d9d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8a7129b8f3edd95b7d969dfc2c8e9d9d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8a7129b8f3edd95b7d969dfc2c8e9d9d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8a7129b8f3edd95b7d969dfc2c8e9d9d-Reviews.html", "metareview": "", "pdf_size": 4474520, "gs_citation": 314, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3864972995929128028&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8a7129b8f3edd95b7d969dfc2c8e9d9d-Abstract.html" }, { "title": "ResNet with one-neuron hidden layers is a Universal Approximator", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11598", "id": "11598", "author_site": "Hongzhou Lin, Stefanie Jegelka", "author": "Hongzhou Lin; Stefanie Jegelka", "abstract": "We demonstrate that a very deep ResNet with stacked modules that have one neuron per hidden layer and ReLU activation functions can uniformly approximate any Lebesgue integrable function in d dimensions, i.e. \\ell_1(R^d). Due to the identity mapping inherent to ResNets, our network has alternating layers of dimension one and d. This stands in sharp contrast to fully connected networks, which are not universal approximators if their width is the input dimension d [21,11]. Hence, our result implies an increase in representational power for narrow deep networks by the ResNet architecture.", "bibtex": "@inproceedings{NEURIPS2018_03bfc1d4,\n author = {Lin, Hongzhou and Jegelka, Stefanie},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {ResNet with one-neuron hidden layers is a Universal Approximator},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/03bfc1d4783966c69cc6aef8247e0103-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/03bfc1d4783966c69cc6aef8247e0103-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/03bfc1d4783966c69cc6aef8247e0103-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/03bfc1d4783966c69cc6aef8247e0103-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/03bfc1d4783966c69cc6aef8247e0103-Reviews.html", "metareview": "", "pdf_size": 752289, "gs_citation": 317, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4714701838562431074&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "MIT; MIT", "aff_domain": "mit.edu;mit.edu", "email": "mit.edu;mit.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/03bfc1d4783966c69cc6aef8247e0103-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Rest-Katyusha: Exploiting the Solution's Structure via Scheduled Restart Schemes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11067", "id": "11067", "author_site": "Junqi Tang, Mohammad Golbabaee, Francis Bach, Mike Davies", "author": "Junqi Tang; Mohammad Golbabaee; Francis Bach; Mike E davies", "abstract": "We propose a structure-adaptive variant of the state-of-the-art stochastic variance-reduced gradient algorithm Katyusha for regularized empirical risk minimization. The proposed method is able to exploit the intrinsic low-dimensional structure of the solution, such as sparsity or low rank which is enforced by a non-smooth regularization, to achieve even faster convergence rate. This provable algorithmic improvement is done by restarting the Katyusha algorithm according to restricted strong-convexity constants. We demonstrate the effectiveness of our approach via numerical experiments.", "bibtex": "@inproceedings{NEURIPS2018_39059724,\n author = {Tang, Junqi and Golbabaee, Mohammad and Bach, Francis and davies, Mike E},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Rest-Katyusha: Exploiting the Solution\\textquotesingle s Structure via Scheduled Restart Schemes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/39059724f73a9969845dfe4146c5660e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/39059724f73a9969845dfe4146c5660e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/39059724f73a9969845dfe4146c5660e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/39059724f73a9969845dfe4146c5660e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/39059724f73a9969845dfe4146c5660e-Reviews.html", "metareview": "", "pdf_size": 617769, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7980763301102124058&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "School of Engineering, University of Edinburgh, UK; Department of Computer Science, University of Bath, UK; INRIA - ENS, PSL Research University, France; School of Engineering, University of Edinburgh, UK", "aff_domain": "ed.ac.uk;bath.ac.uk;inria.fr;ed.ac.uk", "email": "ed.ac.uk;bath.ac.uk;inria.fr;ed.ac.uk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/39059724f73a9969845dfe4146c5660e-Abstract.html", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Edinburgh;University of Bath;INRIA", "aff_unique_dep": "School of Engineering;Department of Computer Science;ENS, PSL Research University", "aff_unique_url": "https://www.ed.ac.uk;https://www.bath.ac.uk;https://www.inria.fr", "aff_unique_abbr": "Edinburgh;Bath;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United Kingdom;France" }, { "title": "RetGK: Graph Kernels based on Return Probabilities of Random Walks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11394", "id": "11394", "author_site": "Zhen Zhang, Mianzhi Wang, Yijian Xiang, Yan Huang, Arye Nehorai", "author": "Zhen Zhang; Mianzhi Wang; Yijian Xiang; Yan Huang; Arye Nehorai", "abstract": "Graph-structured data arise in wide applications, such as computer vision, bioinformatics, and social networks. Quantifying similarities among graphs is a fundamental problem. In this paper, we develop a framework for computing graph kernels, based on return probabilities of random walks. The advantages of our proposed kernels are that they can effectively exploit various node attributes, while being scalable to large datasets. We conduct extensive graph classification experiments to evaluate our graph kernels. The experimental results show that our graph kernels significantly outperform other state-of-the-art approaches in both accuracy and computational efficiency.", "bibtex": "@inproceedings{NEURIPS2018_7f16109f,\n author = {Zhang, Zhen and Wang, Mianzhi and Xiang, Yijian and Huang, Yan and Nehorai, Arye},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {RetGK: Graph Kernels based on Return Probabilities of Random Walks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7f16109f1619fd7a733daf5a84c708c1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7f16109f1619fd7a733daf5a84c708c1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7f16109f1619fd7a733daf5a84c708c1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7f16109f1619fd7a733daf5a84c708c1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7f16109f1619fd7a733daf5a84c708c1-Reviews.html", "metareview": "", "pdf_size": 622223, "gs_citation": 124, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5491765719094235233&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Electrical and Systems Engineering, Washington University in St. Louis; Department of Electrical and Systems Engineering, Washington University in St. Louis; Department of Electrical and Systems Engineering, Washington University in St. Louis; Department of Electrical and Systems Engineering, Washington University in St. Louis; Department of Electrical and Systems Engineering, Washington University in St. Louis", "aff_domain": "wustl.edu;wustl.edu;wustl.edu;wustl.edu;wustl.edu", "email": "wustl.edu;wustl.edu;wustl.edu;wustl.edu;wustl.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7f16109f1619fd7a733daf5a84c708c1-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Washington University in St. Louis", "aff_unique_dep": "Department of Electrical and Systems Engineering", "aff_unique_url": "https://wustl.edu", "aff_unique_abbr": "WUSTL", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "St. Louis", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reversible Recurrent Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11861", "id": "11861", "author_site": "Matthew MacKay, Paul Vicol, Jimmy Ba, Roger Grosse", "author": "Matthew MacKay; Paul Vicol; Jimmy Ba; Roger B Grosse", "abstract": "Recurrent neural networks (RNNs) provide state-of-the-art performance in processing sequential data but are memory intensive to train, limiting the flexibility of RNN models which can be trained. Reversible RNNs---RNNs for which the hidden-to-hidden transition can be reversed---offer a path to reduce the memory requirements of training, as hidden states need not be stored and instead can be recomputed during backpropagation. We first show that perfectly reversible RNNs, which require no storage of the hidden activations, are fundamentally limited because they cannot forget information from their hidden state. We then provide a scheme for storing a small number of bits in order to allow perfect reversal with forgetting. Our method achieves comparable performance to traditional models while reducing the activation memory cost by a factor of 10--15. We extend our technique to attention-based sequence-to-sequence models, where it maintains performance while reducing activation memory cost by a factor of 5--10 in the encoder, and a factor of 10--15 in the decoder.", "bibtex": "@inproceedings{NEURIPS2018_4ff6fa96,\n author = {MacKay, Matthew and Vicol, Paul and Ba, Jimmy and Grosse, Roger B},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Reversible Recurrent Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4ff6fa96179cdc2838e8d8ce64cd10a7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4ff6fa96179cdc2838e8d8ce64cd10a7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4ff6fa96179cdc2838e8d8ce64cd10a7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4ff6fa96179cdc2838e8d8ce64cd10a7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4ff6fa96179cdc2838e8d8ce64cd10a7-Reviews.html", "metareview": "", "pdf_size": 464930, "gs_citation": 72, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2936325833713118727&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "University of Toronto; University of Toronto; University of Toronto; University of Toronto", "aff_domain": "cs.toronto.edu;cs.toronto.edu;cs.toronto.edu;cs.toronto.edu", "email": "cs.toronto.edu;cs.toronto.edu;cs.toronto.edu;cs.toronto.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4ff6fa96179cdc2838e8d8ce64cd10a7-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Revisiting $(\\epsilon, \\gamma, \\tau)$-similarity learning for domain adaptation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11712", "id": "11712", "author_site": "Sofiane Dhouib, Ievgen Redko", "author": "Sofiane Dhouib; Ievgen Redko", "abstract": "Similarity learning is an active research area in machine learning that tackles the problem of finding a similarity function tailored to an observable data sample in order to achieve efficient classification. This learning scenario has been generally formalized by the means of a $(\\epsilon, \\gamma, \\tau)-$good similarity learning framework in the context of supervised classification and has been shown to have strong theoretical guarantees. In this paper, we propose to extend the theoretical analysis of similarity learning to the domain adaptation setting, a particular situation occurring when the similarity is learned and then deployed on samples following different probability distributions. We give a new definition of an $(\\epsilon, \\gamma)-$good similarity for domain adaptation and prove several results quantifying the performance of a similarity function on a target domain after it has been trained on a source domain. We particularly show that if the source distribution dominates the target one, then principally new domain adaptation learning bounds can be proved.", "bibtex": "@inproceedings{NEURIPS2018_781397bc,\n author = {Dhouib, Sofiane and Redko, Ievgen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Revisiting (\\textbackslash epsilon, \\textbackslash gamma, \\textbackslash tau)-similarity learning for domain adaptation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/781397bc0630d47ab531ea850bddcf63-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/781397bc0630d47ab531ea850bddcf63-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/781397bc0630d47ab531ea850bddcf63-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/781397bc0630d47ab531ea850bddcf63-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/781397bc0630d47ab531ea850bddcf63-Reviews.html", "metareview": "", "pdf_size": 935881, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff": "Univ Lyon, INSA-Lyon, Universit\u00e9 Claude Bernard Lyon 1, UJM-Saint Etienne, CNRS, Inserm, CREATIS UMR 5220, U1206, F-69100, LYON, France; Univ Lyon, UJM-Saint-Etienne, CNRS, Institut d Optique Graduate School Laboratoire Hubert Curien UMR 5516, F-42023, Saint-Etienne, France", "aff_domain": "creatis.insa-lyon.fr;univ-st-etienne.fr", "email": "creatis.insa-lyon.fr;univ-st-etienne.fr", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/781397bc0630d47ab531ea850bddcf63-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 Claude Bernard Lyon 1;Universite Lyon", "aff_unique_dep": ";Institut d Optique Graduate School", "aff_unique_url": "https://www.ucbl.fr;https://www.univ-lyon.fr", "aff_unique_abbr": "UCBL;Univ Lyon", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lyon", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Revisiting Decomposable Submodular Function Minimization with Incidence Relations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11234", "id": "11234", "author_site": "Pan Li, Olgica Milenkovic", "author": "Pan Li; Olgica Milenkovic", "abstract": "We introduce a new approach to decomposable submodular function minimization (DSFM) that exploits incidence relations. Incidence relations describe which variables effectively influence the component functions, and when properly utilized, they allow for improving the convergence rates of DSFM solvers. Our main results include the precise parametrization of the DSFM problem based on incidence relations, the development of new scalable alternative projections and parallel coordinate descent methods and an accompanying rigorous analysis of their convergence rates.", "bibtex": "@inproceedings{NEURIPS2018_c21002f4,\n author = {Li, Pan and Milenkovic, Olgica},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Revisiting Decomposable Submodular Function Minimization with Incidence Relations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c21002f464c5fc5bee3b98ced83963b8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c21002f464c5fc5bee3b98ced83963b8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c21002f464c5fc5bee3b98ced83963b8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c21002f464c5fc5bee3b98ced83963b8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c21002f464c5fc5bee3b98ced83963b8-Reviews.html", "metareview": "", "pdf_size": 494813, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11168625649110015445&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "UIUC; UIUC", "aff_domain": "illinois.edu;illinois.edu", "email": "illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c21002f464c5fc5bee3b98ced83963b8-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://www illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Revisiting Multi-Task Learning with ROCK: a Deep Residual Auxiliary Block for Visual Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11148", "id": "11148", "author_site": "Taylor Mordan, Nicolas THOME, Gilles Henaff, Matthieu Cord", "author": "Taylor Mordan; Nicolas THOME; Gilles Henaff; Matthieu Cord", "abstract": "Multi-Task Learning (MTL) is appealing for deep learning regularization. In this paper, we tackle a specific MTL context denoted as primary MTL, where the ultimate goal is to improve the performance of a given primary task by leveraging several other auxiliary tasks. Our main methodological contribution is to introduce ROCK, a new generic multi-modal fusion block for deep learning tailored to the primary MTL context. ROCK architecture is based on a residual connection, which makes forward prediction explicitly impacted by the intermediate auxiliary representations. The auxiliary predictor's architecture is also specifically designed to our primary MTL context, by incorporating intensive pooling operators for maximizing complementarity of intermediate representations. Extensive experiments on NYUv2 dataset (object detection with scene classification, depth prediction, and surface normal estimation as auxiliary tasks) validate the relevance of the approach and its superiority to flat MTL approaches. Our method outperforms state-of-the-art object detection models on NYUv2 dataset by a large margin, and is also able to handle large-scale heterogeneous inputs (real and synthetic images) with missing annotation modalities.", "bibtex": "@inproceedings{NEURIPS2018_7f5d04d1,\n author = {Mordan, Taylor and THOME, Nicolas and Henaff, Gilles and Cord, Matthieu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Revisiting Multi-Task Learning with ROCK: a Deep Residual Auxiliary Block for Visual Detection},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7f5d04d189dfb634e6a85bb9d9adf21e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7f5d04d189dfb634e6a85bb9d9adf21e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7f5d04d189dfb634e6a85bb9d9adf21e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7f5d04d189dfb634e6a85bb9d9adf21e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7f5d04d189dfb634e6a85bb9d9adf21e-Reviews.html", "metareview": "", "pdf_size": 3891537, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18021983458902468618&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Sorbonne Universit\u00e9, CNRS, Laboratoire d\u2019Informatique de Paris 6, LIP6+Thales Land and Air Systems; CEDRIC, Conservatoire National des Arts et M\u00e9tiers; Thales Land and Air Systems; Sorbonne Universit\u00e9, CNRS, Laboratoire d\u2019Informatique de Paris 6, LIP6", "aff_domain": "lip6.fr;cnam.fr;fr.thalesgroup.com;lip6.fr", "email": "lip6.fr;cnam.fr;fr.thalesgroup.com;lip6.fr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7f5d04d189dfb634e6a85bb9d9adf21e-Abstract.html", "aff_unique_index": "0+1;2;1;0", "aff_unique_norm": "Sorbonne Universit\u00e9;Thales;Conservatoire National des Arts et M\u00e9tiers", "aff_unique_dep": "Laboratoire d\u2019Informatique de Paris 6;Land and Air Systems;CEDRIC", "aff_unique_url": "https://www.sorbonne-universite.fr;https://www.thalesgroup.com;https://www.cnam.fr", "aff_unique_abbr": "Sorbonne U;Thales;CNAM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;0;0", "aff_country_unique": "France" }, { "title": "Reward learning from human preferences and demonstrations in Atari", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11768", "id": "11768", "author_site": "Borja Ibarz, Jan Leike, Tobias Pohlen, Geoffrey Irving, Shane Legg, Dario Amodei", "author": "Borja Ibarz; Jan Leike; Tobias Pohlen; Geoffrey Irving; Shane Legg; Dario Amodei", "abstract": "To solve complex real-world problems with reinforcement learning, we cannot rely on manually specified reward functions. Instead, we need humans to communicate an objective to the agent directly. In this work, we combine two approaches to this problem: learning from expert demonstrations and learning from trajectory preferences. We use both to train a deep neural network to model the reward function and use its predicted reward to train an DQN-based deep reinforcement learning agent on 9 Atari games. Our approach beats the imitation learning baseline in 7 games and achieves strictly superhuman performance on 2 games. Additionally, we investigate the fit of the reward model, present some reward hacking problems, and study the effects of noise in the human labels.", "bibtex": "@inproceedings{NEURIPS2018_8cbe9ce2,\n author = {Ibarz, Borja and Leike, Jan and Pohlen, Tobias and Irving, Geoffrey and Legg, Shane and Amodei, Dario},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Reward learning from human preferences and demonstrations in Atari},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8cbe9ce23f42628c98f80fa0fac8b19a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8cbe9ce23f42628c98f80fa0fac8b19a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8cbe9ce23f42628c98f80fa0fac8b19a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8cbe9ce23f42628c98f80fa0fac8b19a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8cbe9ce23f42628c98f80fa0fac8b19a-Reviews.html", "metareview": "", "pdf_size": 4403655, "gs_citation": 504, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=476444635873764909&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "DeepMind; DeepMind; DeepMind; OpenAI; DeepMind; OpenAI", "aff_domain": "google.com;google.com;google.com;openai.com;google.com;openai.com", "email": "google.com;google.com;google.com;openai.com;google.com;openai.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8cbe9ce23f42628c98f80fa0fac8b19a-Abstract.html", "aff_unique_index": "0;0;0;1;0;1", "aff_unique_norm": "DeepMind;OpenAI", "aff_unique_dep": ";", "aff_unique_url": "https://deepmind.com;https://openai.com", "aff_unique_abbr": "DeepMind;OpenAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Ridge Regression and Provable Deterministic Ridge Leverage Score Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11255", "id": "11255", "author": "Shannon McCurdy", "abstract": "Ridge leverage scores provide a balance between low-rank approximation and regularization, and are ubiquitous in randomized linear algebra and machine learning. Deterministic algorithms are also of interest in the moderately big data regime, because deterministic algorithms provide interpretability to the practitioner by having no failure probability and always returning the same results. We provide provable guarantees for deterministic column sampling using ridge leverage scores. The matrix sketch returned by our algorithm is a column subset of the original matrix, yielding additional interpretability. Like the randomized counterparts, the deterministic algorithm provides $(1+\\epsilon)$ error column subset selection, $(1+\\epsilon)$ error projection-cost preservation, and an additive-multiplicative spectral bound. We also show that under the assumption of power-law decay of ridge leverage scores, this deterministic algorithm is provably as accurate as randomized algorithms. Lastly, ridge regression is frequently used to regularize ill-posed linear least-squares problems. While ridge regression provides shrinkage for the regression coefficients, many of the coefficients remain small but non-zero. Performing ridge regression with the matrix sketch returned by our algorithm and a particular regularization parameter forces coefficients to zero and has a provable $(1+\\epsilon)$ bound on the statistical risk. As such, it is an interesting alternative to elastic net regularization.", "bibtex": "@inproceedings{NEURIPS2018_e1d5be1c,\n author = {McCurdy, Shannon},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Ridge Regression and Provable Deterministic Ridge Leverage Score Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e1d5be1c7f2f456670de3d53c7b54f4a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e1d5be1c7f2f456670de3d53c7b54f4a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e1d5be1c7f2f456670de3d53c7b54f4a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e1d5be1c7f2f456670de3d53c7b54f4a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e1d5be1c7f2f456670de3d53c7b54f4a-Reviews.html", "metareview": "", "pdf_size": 349287, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13689238016366756695&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e1d5be1c7f2f456670de3d53c7b54f4a-Abstract.html" }, { "title": "Robot Learning in Homes: Improving Generalization and Reducing Dataset Bias", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11867", "id": "11867", "author_site": "Abhinav Gupta, Adithyavairavan Murali, Dhiraj Prakashchand Gandhi, Lerrel Pinto", "author": "Abhinav Gupta; Adithyavairavan Murali; Dhiraj Prakashchand Gandhi; Lerrel Pinto", "abstract": "Data-driven approaches to solving robotic tasks have gained a lot of traction in recent years. However, most existing policies are trained on large-scale datasets collected in curated lab settings. If we aim to deploy these models in unstructured visual environments like people's homes, they will be unable to cope with the mismatch in data distribution. In such light, we present the first systematic effort in collecting a large dataset for robotic grasping in homes. First, to scale and parallelize data collection, we built a low cost mobile manipulator assembled for under 3K USD. Second, data collected using low cost robots suffer from noisy labels due to imperfect execution and calibration errors. To handle this, we develop a framework which factors out the noise as a latent variable. Our model is trained on 28K grasps collected in several houses under an array of different environmental conditions. We evaluate our models by physically executing grasps on a collection of novel objects in multiple unseen homes. The models trained with our home dataset showed a marked improvement of 43.7% over a baseline model trained with data collected in lab. Our architecture which explicitly models the latent noise in the dataset also performed 10% better than one that did not factor out the noise. We hope this effort inspires the robotics community to look outside the lab and embrace learning based approaches to handle inaccurate cheap robots.", "bibtex": "@inproceedings{NEURIPS2018_febefe1c,\n author = {Gupta, Abhinav and Murali, Adithyavairavan and Gandhi, Dhiraj Prakashchand and Pinto, Lerrel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Robot Learning in Homes: Improving Generalization and Reducing Dataset Bias},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/febefe1cc5c87748ea02036dbe9e3d67-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/febefe1cc5c87748ea02036dbe9e3d67-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/febefe1cc5c87748ea02036dbe9e3d67-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/febefe1cc5c87748ea02036dbe9e3d67-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/febefe1cc5c87748ea02036dbe9e3d67-Reviews.html", "metareview": "", "pdf_size": 19561669, "gs_citation": 167, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16746663049539886768&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "The Robotics Institute; The Robotics Institute; The Robotics Institute; The Robotics Institute", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/febefe1cc5c87748ea02036dbe9e3d67-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "The Robotics Institute", "aff_unique_url": "http://www.ri.cmu.edu", "aff_unique_abbr": "RI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Robust Detection of Adversarial Attacks by Modeling the Intrinsic Properties of Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11759", "id": "11759", "author_site": "Zhihao Zheng, Pengyu Hong", "author": "Zhihao Zheng; Pengyu Hong", "abstract": "It has been shown that deep neural network (DNN) based classifiers are vulnerable to human-imperceptive adversarial perturbations which can cause DNN classifiers to output wrong predictions with high confidence. We propose an unsupervised learning approach to detect adversarial inputs without any knowledge of attackers. Our approach tries to capture the intrinsic properties of a DNN classifier and uses them to detect adversarial inputs. The intrinsic properties used in this study are the output distributions of the hidden neurons in a DNN classifier presented with natural images. Our approach can be easily applied to any DNN classifiers or combined with other defense strategy to improve robustness. Experimental results show that our approach demonstrates state-of-the-art robustness in defending black-box and gray-box attacks.", "bibtex": "@inproceedings{NEURIPS2018_e7a425c6,\n author = {Zheng, Zhihao and Hong, Pengyu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Robust Detection of Adversarial Attacks by Modeling the Intrinsic Properties of Deep Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e7a425c6ece20cbc9056f98699b53c6f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e7a425c6ece20cbc9056f98699b53c6f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e7a425c6ece20cbc9056f98699b53c6f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e7a425c6ece20cbc9056f98699b53c6f-Reviews.html", "metareview": "", "pdf_size": 407686, "gs_citation": 177, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9374449561976302554&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, Brandeis University; Department of Computer Science, Brandeis University", "aff_domain": "brandeis.edu;brandeis.edu", "email": "brandeis.edu;brandeis.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e7a425c6ece20cbc9056f98699b53c6f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Brandeis University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.brandeis.edu", "aff_unique_abbr": "Brandeis", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Robust Hypothesis Testing Using Wasserstein Uncertainty Sets", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11758", "id": "11758", "author_site": "Rui Gao, Liyan Xie, Yao Xie, Huan Xu", "author": "RUI GAO; Liyan Xie; Yao Xie; Huan Xu", "abstract": "We develop a novel computationally efficient and general framework for robust hypothesis testing. The new framework features a new way to construct uncertainty sets under the null and the alternative distributions, which are sets centered around the empirical distribution defined via Wasserstein metric, thus our approach is data-driven and free of distributional assumptions. We develop a convex safe approximation of the minimax formulation and show that such approximation renders a nearly-optimal detector among the family of all possible tests. By exploiting the structure of the least favorable distribution, we also develop a tractable reformulation of such approximation, with complexity independent of the dimension of observation space and can be nearly sample-size-independent in general. Real-data example using human activity data demonstrated the excellent performance of the new robust detector.", "bibtex": "@inproceedings{NEURIPS2018_a08e32d2,\n author = {GAO, RUI and Xie, Liyan and Xie, Yao and Xu, Huan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Robust Hypothesis Testing Using Wasserstein Uncertainty Sets},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a08e32d2f9a8b78894d964ec7fd4172e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a08e32d2f9a8b78894d964ec7fd4172e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a08e32d2f9a8b78894d964ec7fd4172e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a08e32d2f9a8b78894d964ec7fd4172e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a08e32d2f9a8b78894d964ec7fd4172e-Reviews.html", "metareview": "", "pdf_size": 479709, "gs_citation": 92, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16532981562633039964&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "School of Industrial and Systems Engineering, Georgia Institute of Technology; School of Industrial and Systems Engineering, Georgia Institute of Technology; School of Industrial and Systems Engineering, Georgia Institute of Technology; School of Industrial and Systems Engineering, Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;isye.gatech.edu;isye.gatech.edu", "email": "gatech.edu;gatech.edu;isye.gatech.edu;isye.gatech.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a08e32d2f9a8b78894d964ec7fd4172e-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "School of Industrial and Systems Engineering", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Atlanta", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Robust Learning of Fixed-Structure Bayesian Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11973", "id": "11973", "author_site": "Yu Cheng, Ilias Diakonikolas, Daniel Kane, Alistair Stewart", "author": "Yu Cheng; Ilias Diakonikolas; Daniel Kane; Alistair Stewart", "abstract": "We investigate the problem of learning Bayesian networks in a robust model where an $\\epsilon$-fraction of the samples are adversarially corrupted. In this work, we study the fully observable discrete case where the structure of the network is given. Even in this basic setting, previous learning algorithms either run in exponential time or lose dimension-dependent factors in their error guarantees. We provide the first computationally efficient robust learning algorithm for this problem with dimension-independent error guarantees. Our algorithm has near-optimal sample complexity, runs in polynomial time, and achieves error that scales nearly-linearly with the fraction of adversarially corrupted samples. Finally, we show on both synthetic and semi-synthetic data that our algorithm performs well in practice.", "bibtex": "@inproceedings{NEURIPS2018_486fbd76,\n author = {Cheng, Yu and Diakonikolas, Ilias and Kane, Daniel and Stewart, Alistair},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Robust Learning of Fixed-Structure Bayesian Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/486fbd761bfa5400722324fdc9822adc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/486fbd761bfa5400722324fdc9822adc-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/486fbd761bfa5400722324fdc9822adc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/486fbd761bfa5400722324fdc9822adc-Reviews.html", "metareview": "", "pdf_size": 376890, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17777102248300295784&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Department of Computer Science, Duke University; Department of Computer Science, University of Southern California; Department of Computer Science and Engineering, University of California, San Diego; Department of Computer Science, University of Southern California", "aff_domain": "cs.duke.edu;gmail.com;ucsd.edu;gmail.com", "email": "cs.duke.edu;gmail.com;ucsd.edu;gmail.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/486fbd761bfa5400722324fdc9822adc-Abstract.html", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Duke University;University of Southern California;University of California, San Diego", "aff_unique_dep": "Department of Computer Science;Department of Computer Science;Department of Computer Science and Engineering", "aff_unique_url": "https://www.duke.edu;https://www.usc.edu;https://www.ucsd.edu", "aff_unique_abbr": "Duke;USC;UCSD", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Los Angeles;San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Robust Subspace Approximation in a Stream", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12010", "id": "12010", "author_site": "Roie Levin, Anish Prasad Sevekari, David Woodruff", "author": "Roie Levin; Anish Prasad Sevekari; David Woodruff", "abstract": "We study robust subspace estimation in the streaming and distributed settings. Given a set of n data points {a", "bibtex": "@inproceedings{NEURIPS2018_f5e53608,\n author = {Levin, Roie and Sevekari, Anish Prasad and Woodruff, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Robust Subspace Approximation in a Stream},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f5e536083a438cec5b64a4954abc17f1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f5e536083a438cec5b64a4954abc17f1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f5e536083a438cec5b64a4954abc17f1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f5e536083a438cec5b64a4954abc17f1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f5e536083a438cec5b64a4954abc17f1-Reviews.html", "metareview": "", "pdf_size": 302719, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3941534778926320226&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Computer Science Department, Carnegie Mellon University, Pittsburgh, PA 15213; Department of Mathematical Sciences, Carnegie Mellon University, Pittsburgh, PA 15213; Computer Science Department, Carnegie Mellon University, Pittsburgh, PA 15213", "aff_domain": "cs.cmu.edu;andrew.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;andrew.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f5e536083a438cec5b64a4954abc17f1-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Robustness of conditional GANs to noisy labels", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11972", "id": "11972", "author_site": "Kiran Thekumparampil, Ashish Khetan, Zinan Lin, Sewoong Oh", "author": "Kiran K Thekumparampil; Ashish Khetan; Zinan Lin; Sewoong Oh", "abstract": "We study the problem of learning conditional generators from noisy labeled samples, where the labels are corrupted by random noise. A standard training of conditional GANs will not only produce samples with wrong labels, but also generate poor quality samples. We consider two scenarios, depending on whether the noise model is known or not. When the distribution of the noise is known, we introduce a novel architecture which we call Robust Conditional GAN (RCGAN). The main idea is to corrupt the label of the generated sample before feeding to the adversarial discriminator, forcing the generator to produce samples with clean labels. This approach of passing through a matching noisy channel is justified by accompanying multiplicative approximation bounds between the loss of the RCGAN and the distance between the clean real distribution and the generator distribution. This shows that the proposed approach is robust, when used with a carefully chosen discriminator architecture, known as projection discriminator. When the distribution of the noise is not known, we provide an extension of our architecture, which we call RCGAN-U, that learns the noise model simultaneously while training the generator. We show experimentally on MNIST and CIFAR-10 datasets that both the approaches consistently improve upon baseline approaches, and RCGAN-U closely matches the performance of RCGAN.", "bibtex": "@inproceedings{NEURIPS2018_565e8a41,\n author = {Thekumparampil, Kiran K and Khetan, Ashish and Lin, Zinan and Oh, Sewoong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Robustness of conditional GANs to noisy labels},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/565e8a413d0562de9ee4378402d2b481-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/565e8a413d0562de9ee4378402d2b481-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/565e8a413d0562de9ee4378402d2b481-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/565e8a413d0562de9ee4378402d2b481-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/565e8a413d0562de9ee4378402d2b481-Reviews.html", "metareview": "", "pdf_size": 803497, "gs_citation": 353, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4597323022745403664&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "University of Illinois at Urbana-Champaign\u2020; University of Illinois at Urbana-Champaign\u2020; Carnegie Mellon University\u2021; University of Illinois at Urbana-Champaign\u2020", "aff_domain": "illinois.edu;gmail.com;andrew.cmu.edu;illinois.edu", "email": "illinois.edu;gmail.com;andrew.cmu.edu;illinois.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/565e8a413d0562de9ee4378402d2b481-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.cmu.edu", "aff_unique_abbr": "UIUC;CMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "SEGA: Variance Reduction via Gradient Sketching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11220", "id": "11220", "author_site": "Filip Hanzely, Konstantin Mishchenko, Peter Richtarik", "author": "Filip Hanzely; Konstantin Mishchenko; Peter Richtarik", "abstract": "We propose a novel randomized first order optimization method---SEGA (SkEtched GrAdient method)---which progressively throughout its iterations builds a variance-reduced estimate of the gradient from random linear measurements (sketches) of the gradient provided at each iteration by an oracle. In each iteration, SEGA updates the current estimate of the gradient through a sketch-and-project operation using the information provided by the latest sketch, and this is subsequently used to compute an unbiased estimate of the true gradient through a random relaxation procedure. This unbiased estimate is then used to perform a gradient step. Unlike standard subspace descent methods, such as coordinate descent, SEGA can be used for optimization problems with a non-separable proximal term. We provide a general convergence analysis and prove linear convergence for strongly convex objectives. In the special case of coordinate sketches, SEGA can be enhanced with various techniques such as importance sampling, minibatching and acceleration, and its rate is up to a small constant factor identical to the best-known rate of coordinate descent.", "bibtex": "@inproceedings{NEURIPS2018_fc2c7c47,\n author = {Hanzely, Filip and Mishchenko, Konstantin and Richtarik, Peter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SEGA: Variance Reduction via Gradient Sketching},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fc2c7c47b918d0c2d792a719dfb602ef-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fc2c7c47b918d0c2d792a719dfb602ef-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/fc2c7c47b918d0c2d792a719dfb602ef-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fc2c7c47b918d0c2d792a719dfb602ef-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fc2c7c47b918d0c2d792a719dfb602ef-Reviews.html", "metareview": "", "pdf_size": 1462630, "gs_citation": 98, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14767558310725730218&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fc2c7c47b918d0c2d792a719dfb602ef-Abstract.html" }, { "title": "SING: Symbol-to-Instrument Neural Generator", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11862", "id": "11862", "author_site": "Alexandre Defossez, Neil Zeghidour, Nicolas Usunier, Leon Bottou, Francis Bach", "author": "Alexandre Defossez; Neil Zeghidour; Nicolas Usunier; Leon Bottou; Francis Bach", "abstract": "Recent progress in deep learning for audio synthesis opens\nthe way to models that directly produce the waveform, shifting away\nfrom the traditional paradigm of relying on vocoders or MIDI synthesizers for speech or music generation. Despite\ntheir successes, current state-of-the-art neural audio synthesizers such\nas WaveNet and SampleRNN suffer from prohibitive training and inference times because they are based on\nautoregressive models that generate audio samples one at a time at a rate of 16kHz. In\nthis work, we study the more computationally efficient alternative of generating the waveform frame-by-frame with large strides.\nWe present a lightweight neural audio synthesizer for the original task of generating musical notes given desired instrument, pitch and velocity. Our model is trained end-to-end to generate notes from nearly 1000 instruments with a single decoder, thanks to a new loss function that minimizes the distances between the log spectrograms of the generated and target waveforms.\nOn the generalization task of synthesizing notes for pairs of pitch and instrument not seen during training, SING produces audio with significantly improved perceptual quality compared to a state-of-the-art autoencoder based on WaveNet as measured by a Mean Opinion Score (MOS), and is about 32 times faster for training and 2, 500 times faster for inference.", "bibtex": "@inproceedings{NEURIPS2018_56dc0997,\n author = {Defossez, Alexandre and Zeghidour, Neil and Usunier, Nicolas and Bottou, Leon and Bach, Francis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SING: Symbol-to-Instrument Neural Generator},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/56dc0997d871e9177069bb472574eb29-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/56dc0997d871e9177069bb472574eb29-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/56dc0997d871e9177069bb472574eb29-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/56dc0997d871e9177069bb472574eb29-Reviews.html", "metareview": "", "pdf_size": 910649, "gs_citation": 92, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9576037029701279224&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 17, "aff": "Facebook AI Research + INRIA / ENS + PSL Research University, Paris, France; Facebook AI Research + LSCP / ENS / EHESS / CNRS + INRIA / PSL Research University, Paris, France; Facebook AI Research, Paris, France; Facebook AI Research, New York, USA; INRIA + \u00c9cole Normale Sup\u00e9rieure + PSL Research University", "aff_domain": "fb.com;fb.com;fb.com;fb.com;ens.fr", "email": "fb.com;fb.com;fb.com;fb.com;ens.fr", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/56dc0997d871e9177069bb472574eb29-Abstract.html", "aff_unique_index": "0+1+2;0+3+1;0;0;1+3+2", "aff_unique_norm": "Meta;INRIA;PSL Research University;\u00c9cole Normale Sup\u00e9rieure", "aff_unique_dep": "Facebook AI Research;;;Laboratoire de Sciences Cognitives et Psycholinguistique", "aff_unique_url": "https://research.facebook.com;https://www.inria.fr;https://www.psl.eu;https://www.ens.psl.eu/", "aff_unique_abbr": "FAIR;INRIA;PSL;ENS", "aff_campus_unique_index": "1;1;1;2;", "aff_campus_unique": ";Paris;New York", "aff_country_unique_index": "0+1+1;0+1+1;1;0;1+1+1", "aff_country_unique": "United States;France" }, { "title": "SLANG: Fast Structured Covariance Approximations for Bayesian Deep Learning with Natural Gradient", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11605", "id": "11605", "author_site": "Aaron Mishkin, Frederik Kunstner, Didrik Nielsen, Mark Schmidt, Mohammad Emtiyaz Khan", "author": "Aaron Mishkin; Frederik Kunstner; Didrik Nielsen; Mark Schmidt; Mohammad Emtiyaz Khan", "abstract": "Uncertainty estimation in large deep-learning models is a computationally challenging\ntask, where it is difficult to form even a Gaussian approximation to the\nposterior distribution. In such situations, existing methods usually resort to a diagonal\napproximation of the covariance matrix despite the fact that these matrices\nare known to give poor uncertainty estimates. To address this issue, we propose\na new stochastic, low-rank, approximate natural-gradient (SLANG) method for\nvariational inference in large deep models. Our method estimates a \u201cdiagonal\nplus low-rank\u201d structure based solely on back-propagated gradients of the network\nlog-likelihood. This requires strictly less gradient computations than methods that\ncompute the gradient of the whole variational objective. Empirical evaluations\non standard benchmarks confirm that SLANG enables faster and more accurate\nestimation of uncertainty than mean-field methods, and performs comparably to\nstate-of-the-art methods.", "bibtex": "@inproceedings{NEURIPS2018_d3157f2f,\n author = {Mishkin, Aaron and Kunstner, Frederik and Nielsen, Didrik and Schmidt, Mark and Khan, Mohammad Emtiyaz},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SLANG: Fast Structured Covariance Approximations for Bayesian Deep Learning with Natural Gradient},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d3157f2f0212a80a5d042c127522a2d5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d3157f2f0212a80a5d042c127522a2d5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d3157f2f0212a80a5d042c127522a2d5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d3157f2f0212a80a5d042c127522a2d5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d3157f2f0212a80a5d042c127522a2d5-Reviews.html", "metareview": "", "pdf_size": 2073403, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16145055537497825367&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "University of British Columbia; Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne; RIKEN Center for AI Project; University of British Columbia; RIKEN Center for AI Project", "aff_domain": "cs.ubc.ca;epfl.ch;riken.jp;cs.ubc.ca;riken.jp", "email": "cs.ubc.ca;epfl.ch;riken.jp;cs.ubc.ca;riken.jp", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d3157f2f0212a80a5d042c127522a2d5-Abstract.html", "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "University of British Columbia;EPFL;RIKEN", "aff_unique_dep": ";;Center for AI Project", "aff_unique_url": "https://www.ubc.ca;https://www.epfl.ch;https://www.riken.jp/en/", "aff_unique_abbr": "UBC;EPFL;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;2", "aff_country_unique": "Canada;Switzerland;Japan" }, { "title": "SLAYER: Spike Layer Error Reassignment in Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11157", "id": "11157", "author_site": "Sumit Bam Shrestha, Garrick Orchard", "author": "Sumit Bam Shrestha; Garrick Orchard", "abstract": "Configuring deep Spiking Neural Networks (SNNs) is an exciting research avenue for low power spike event based computation. However, the spike generation function is non-differentiable and therefore not directly compatible with the standard error backpropagation algorithm. In this paper, we introduce a new general backpropagation mechanism for learning synaptic weights and axonal delays which overcomes the problem of non-differentiability of the spike function and uses a temporal credit assignment policy for backpropagating error to preceding layers. We describe and release a GPU accelerated software implementation of our method which allows training both fully connected and convolutional neural network (CNN) architectures. Using our software, we compare our method against existing SNN based learning approaches and standard ANN to SNN conversion techniques and show that our method achieves state of the art performance for an SNN on the MNIST, NMNIST, DVS Gesture, and TIDIGITS datasets.", "bibtex": "@inproceedings{NEURIPS2018_82f2b308,\n author = {Shrestha, Sumit Bam and Orchard, Garrick},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SLAYER: Spike Layer Error Reassignment in Time},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/82f2b308c3b01637c607ce05f52a2fed-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/82f2b308c3b01637c607ce05f52a2fed-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/82f2b308c3b01637c607ce05f52a2fed-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/82f2b308c3b01637c607ce05f52a2fed-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/82f2b308c3b01637c607ce05f52a2fed-Reviews.html", "metareview": "", "pdf_size": 859724, "gs_citation": 1009, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16662540729963616832&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Temasek Laboratories @ NUS, National University of Singapore, Singapore, 117411; Temasek Laboratories @ NUS, National University of Singapore, Singapore, 117411", "aff_domain": "nus.edu.sg;nus.edu.sg", "email": "nus.edu.sg;nus.edu.sg", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/82f2b308c3b01637c607ce05f52a2fed-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "Temasek Laboratories", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "NUS", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "title": "SNIPER: Efficient Multi-Scale Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11887", "id": "11887", "author_site": "Bharat Singh, Mahyar Najibi, Larry Davis", "author": "Bharat Singh; Mahyar Najibi; Larry S. Davis", "abstract": "We present SNIPER, an algorithm for performing efficient multi-scale training in instance level visual recognition tasks. Instead of processing every pixel in an image pyramid, SNIPER processes context regions around ground-truth instances (referred to as chips) at the appropriate scale. For background sampling, these context-regions are generated using proposals extracted from a region proposal network trained with a short learning schedule. Hence, the number of chips generated per image during training adaptively changes based on the scene complexity. SNIPER only processes 30% more pixels compared to the commonly used single scale training at 800x1333 pixels on the COCO dataset. But, it also observes samples from extreme resolutions of the image pyramid, like 1400x2000 pixels. As SNIPER operates on resampled low resolution chips (512x512 pixels), it can have a batch size as large as 20 on a single GPU even with a ResNet-101 backbone. Therefore it can benefit from batch-normalization during training without the need for synchronizing batch-normalization statistics across GPUs. SNIPER brings training of instance level recognition tasks like object detection closer to the protocol for image classification and suggests that the commonly accepted guideline that it is important to train on high resolution images for instance level visual recognition tasks might not be correct. Our implementation based on Faster-RCNN with a ResNet-101 backbone obtains an mAP of 47.6% on the COCO dataset for bounding box detection and can process 5 images per second during inference with a single GPU. Code is available at https://github.com/MahyarNajibi/SNIPER/ .", "bibtex": "@inproceedings{NEURIPS2018_166cee72,\n author = {Singh, Bharat and Najibi, Mahyar and Davis, Larry S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SNIPER: Efficient Multi-Scale Training},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/166cee72e93a992007a89b39eb29628b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/166cee72e93a992007a89b39eb29628b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/166cee72e93a992007a89b39eb29628b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/166cee72e93a992007a89b39eb29628b-Reviews.html", "metareview": "", "pdf_size": 2292375, "gs_citation": 693, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15792283057349312488&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "University of Maryland, College Park; University of Maryland, College Park; University of Maryland, College Park", "aff_domain": "cs.umd.edu;cs.umd.edu;cs.umd.edu", "email": "cs.umd.edu;cs.umd.edu;cs.umd.edu", "github": "https://github.com/mahyarnajibi/SNIPER/", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/166cee72e93a992007a89b39eb29628b-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SPIDER: Near-Optimal Non-Convex Optimization via Stochastic Path-Integrated Differential Estimator", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11091", "id": "11091", "author_site": "Cong Fang, Chris Junchi Li, Zhouchen Lin, Tong Zhang", "author": "Cong Fang; Chris Junchi Li; Zhouchen Lin; Tong Zhang", "abstract": "In this paper, we propose a new technique named \\textit{Stochastic Path-Integrated Differential EstimatoR} (SPIDER), which can be used to track many deterministic quantities of interests with significantly reduced computational cost. \nCombining SPIDER with the method of normalized gradient descent, we propose SPIDER-SFO that solve non-convex stochastic optimization problems using stochastic gradients only. \nWe provide a few error-bound results on its convergence rates.\nSpecially, we prove that the SPIDER-SFO algorithm achieves a gradient computation cost of $\\mathcal{O}\\left( \\min( n^{1/2} \\epsilon^{-2}, \\epsilon^{-3} ) \\right)$ to find an $\\epsilon$-approximate first-order stationary point. \nIn addition, we prove that SPIDER-SFO nearly matches the algorithmic lower bound for finding stationary point under the gradient Lipschitz assumption in the finite-sum setting.\nOur SPIDER technique can be further applied to find an $(\\epsilon, \\mathcal{O}(\\ep^{0.5}))$-approximate second-order stationary point at a gradient computation cost of $\\tilde{\\mathcal{O}}\\left( \\min( n^{1/2} \\epsilon^{-2}+\\epsilon^{-2.5}, \\epsilon^{-3} ) \\right)$.", "bibtex": "@inproceedings{NEURIPS2018_1543843a,\n author = {Fang, Cong and Li, Chris Junchi and Lin, Zhouchen and Zhang, Tong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SPIDER: Near-Optimal Non-Convex Optimization via Stochastic Path-Integrated Differential Estimator},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1543843a4723ed2ab08e18053ae6dc5b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1543843a4723ed2ab08e18053ae6dc5b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1543843a4723ed2ab08e18053ae6dc5b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1543843a4723ed2ab08e18053ae6dc5b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1543843a4723ed2ab08e18053ae6dc5b-Reviews.html", "metareview": "", "pdf_size": 362022, "gs_citation": 715, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1874588543956354237&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "aff": "Key Lab. of Machine Intelligence (MoE), School of EECS, Peking University + Tencent AI Lab; Tencent AI Lab; Key Lab. of Machine Intelligence (MoE), School of EECS, Peking University; Tencent AI Lab", "aff_domain": "pku.edu.cn;gmail.com;pku.edu.cn;tongzhang-ml.org", "email": "pku.edu.cn;gmail.com;pku.edu.cn;tongzhang-ml.org", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1543843a4723ed2ab08e18053ae6dc5b-Abstract.html", "aff_unique_index": "0+1;1;0;1", "aff_unique_norm": "Peking University;Tencent", "aff_unique_dep": "School of EECS;Tencent AI Lab", "aff_unique_url": "http://www.pku.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "PKU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;0;0", "aff_country_unique": "China" }, { "title": "Safe Active Learning for Time-Series Modeling with Gaussian Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11280", "id": "11280", "author_site": "Christoph Zimmer, Mona Meister, Duy Nguyen-Tuong", "author": "Christoph Zimmer; Mona Meister; Duy Nguyen-Tuong", "abstract": "Learning time-series models is useful for many applications, such as simulation\nand forecasting. In this study, we consider the problem of actively learning time-series models while taking given safety constraints into account. For time-series modeling we employ a Gaussian process with a nonlinear exogenous input structure. The proposed approach generates data appropriate for time series model learning, i.e. input and output trajectories, by dynamically exploring the input space. The approach parametrizes the input trajectory as consecutive trajectory sections, which are determined stepwise given safety requirements and past observations. We analyze the proposed algorithm and evaluate it empirically on a technical application. The results show the effectiveness of our approach in a realistic technical use case.", "bibtex": "@inproceedings{NEURIPS2018_b197ffde,\n author = {Zimmer, Christoph and Meister, Mona and Nguyen-Tuong, Duy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Safe Active Learning for Time-Series Modeling with Gaussian Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b197ffdef2ddc3308584dce7afa3661b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b197ffdef2ddc3308584dce7afa3661b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b197ffdef2ddc3308584dce7afa3661b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b197ffdef2ddc3308584dce7afa3661b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b197ffdef2ddc3308584dce7afa3661b-Reviews.html", "metareview": "", "pdf_size": 722160, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17529910038349867086&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Bosch Center for Artificial Intelligence, Renningen, Germany; Bosch Center for Artificial Intelligence, Renningen, Germany; Bosch Center for Artificial Intelligence, Renningen, Germany", "aff_domain": "de.bosch.com;de.bosch.com;de.bosch.com", "email": "de.bosch.com;de.bosch.com;de.bosch.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b197ffdef2ddc3308584dce7afa3661b-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Bosch Center for Artificial Intelligence", "aff_unique_dep": "Artificial Intelligence", "aff_unique_url": "https://www.bosch-ai.com", "aff_unique_abbr": "BCAI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Renningen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Sample Efficient Stochastic Gradient Iterative Hard Thresholding Method for Stochastic Sparse Linear Regression with Limited Attribute Observation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11519", "id": "11519", "author_site": "Tomoya Murata, Taiji Suzuki", "author": "Tomoya Murata; Taiji Suzuki", "abstract": "We develop new stochastic gradient methods for efficiently solving sparse linear regression in a partial attribute observation setting, where learners are only allowed to observe a fixed number of actively chosen attributes per example at training and prediction times. It is shown that the methods achieve essentially a sample complexity of $O(1/\\varepsilon)$ to attain an error of $\\varepsilon$ under a variant of restricted eigenvalue condition, and the rate has better dependency on the problem dimension than existing methods. Particularly, if the smallest magnitude of the non-zero components of the optimal solution is not too small, the rate of our proposed {\\it Hybrid} algorithm can be boosted to near the minimax optimal sample complexity of {\\it full information} algorithms. The core ideas are (i) efficient construction of an unbiased gradient estimator by the iterative usage of the hard thresholding operator for configuring an exploration algorithm; and (ii) an adaptive combination of the exploration and an exploitation algorithms for quickly identifying the support of the optimum and efficiently searching the optimal parameter in its support. Experimental results are presented to validate our theoretical findings and the superiority of our proposed methods.", "bibtex": "@inproceedings{NEURIPS2018_c3a690be,\n author = {Murata, Tomoya and Suzuki, Taiji},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sample Efficient Stochastic Gradient Iterative Hard Thresholding Method for Stochastic Sparse Linear Regression with Limited Attribute Observation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c3a690be93aa602ee2dc0ccab5b7b67e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c3a690be93aa602ee2dc0ccab5b7b67e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c3a690be93aa602ee2dc0ccab5b7b67e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c3a690be93aa602ee2dc0ccab5b7b67e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c3a690be93aa602ee2dc0ccab5b7b67e-Reviews.html", "metareview": "", "pdf_size": 381667, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12739186283143366419&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "NTT DATA Mathematical Systems Inc., Tokyo, Japan; Department of Mathematical Informatics, Graduate School of Information Science and Technology, The University of Tokyo, Tokyo, Japan + Center for Advanced Intelligence Project, RIKEN, Tokyo, Japan", "aff_domain": "msi.co.jp;mist.i.u-tokyo.ac.jp", "email": "msi.co.jp;mist.i.u-tokyo.ac.jp", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c3a690be93aa602ee2dc0ccab5b7b67e-Abstract.html", "aff_unique_index": "0;1+2", "aff_unique_norm": "NTT DATA Mathematical Systems Inc.;University of Tokyo;RIKEN", "aff_unique_dep": ";Department of Mathematical Informatics, Graduate School of Information Science and Technology;Center for Advanced Intelligence Project", "aff_unique_url": "https://www.ntt-data.com/;https://www.u-tokyo.ac.jp;https://www.riken.jp", "aff_unique_abbr": ";UTokyo;RIKEN", "aff_campus_unique_index": "1+1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;0+0", "aff_country_unique": "Japan" }, { "title": "Sample-Efficient Reinforcement Learning with Stochastic Ensemble Value Expansion", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11787", "id": "11787", "author_site": "Jacob Buckman, Danijar Hafner, George Tucker, Eugene Brevdo, Honglak Lee", "author": "Jacob Buckman; Danijar Hafner; George Tucker; Eugene Brevdo; Honglak Lee", "abstract": "There is growing interest in combining model-free and model-based approaches in reinforcement learning with the goal of achieving the high performance of model-free algorithms with low sample complexity. This is difficult because an imperfect dynamics model can degrade the performance of the learning algorithm, and in sufficiently complex environments, the dynamics model will always be imperfect. As a result, a key challenge is to combine model-based approaches with model-free learning in such a way that errors in the model do not degrade performance. We propose stochastic ensemble value expansion (STEVE), a novel model-based technique that addresses this issue. By dynamically interpolating between model rollouts of various horizon lengths, STEVE ensures that the model is only utilized when doing so does not introduce significant errors. Our approach outperforms model-free baselines on challenging continuous control benchmarks with an order-of-magnitude increase in sample efficiency.", "bibtex": "@inproceedings{NEURIPS2018_f02208a0,\n author = {Buckman, Jacob and Hafner, Danijar and Tucker, George and Brevdo, Eugene and Lee, Honglak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sample-Efficient Reinforcement Learning with Stochastic Ensemble Value Expansion},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f02208a057804ee16ac72ff4d3cec53b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f02208a057804ee16ac72ff4d3cec53b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f02208a057804ee16ac72ff4d3cec53b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f02208a057804ee16ac72ff4d3cec53b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f02208a057804ee16ac72ff4d3cec53b-Reviews.html", "metareview": "", "pdf_size": 3134703, "gs_citation": 436, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12106658410656872341&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Google Brain, Mountain View, CA, USA; Google Brain, Mountain View, CA, USA; Google Brain, Mountain View, CA, USA; Google Brain, Mountain View, CA, USA; Google Brain, Mountain View, CA, USA", "aff_domain": "gmail.com;danijar.com;google.com;google.com;google.com", "email": "gmail.com;danijar.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f02208a057804ee16ac72ff4d3cec53b-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Brain", "aff_unique_url": "https://brain.google.com", "aff_unique_abbr": "Google Brain", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Sanity Checks for Saliency Maps", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11904", "id": "11904", "author_site": "Julius Adebayo, Justin Gilmer, Michael Muelly, Ian Goodfellow, Moritz Hardt, Been Kim", "author": "Julius Adebayo; Justin Gilmer; Michael Muelly; Ian Goodfellow; Moritz Hardt; Been Kim", "abstract": "Saliency methods have emerged as a popular tool to highlight features in an input\ndeemed relevant for the prediction of a learned model. Several saliency methods\nhave been proposed, often guided by visual appeal on image data. In this work, we\npropose an actionable methodology to evaluate what kinds of explanations a given\nmethod can and cannot provide. We find that reliance, solely, on visual assessment\ncan be misleading. Through extensive experiments we show that some existing\nsaliency methods are independent both of the model and of the data generating\nprocess. Consequently, methods that fail the proposed tests are inadequate for\ntasks that are sensitive to either data or model, such as, finding outliers in the data,\nexplaining the relationship between inputs and outputs that the model learned,\nand debugging the model. We interpret our findings through an analogy with\nedge detection in images, a technique that requires neither training data nor model.\nTheory in the case of a linear model and a single-layer convolutional neural network\nsupports our experimental findings.", "bibtex": "@inproceedings{NEURIPS2018_294a8ed2,\n author = {Adebayo, Julius and Gilmer, Justin and Muelly, Michael and Goodfellow, Ian and Hardt, Moritz and Kim, Been},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sanity Checks for Saliency Maps},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/294a8ed24b1ad22ec2e7efea049b8737-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/294a8ed24b1ad22ec2e7efea049b8737-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/294a8ed24b1ad22ec2e7efea049b8737-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/294a8ed24b1ad22ec2e7efea049b8737-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/294a8ed24b1ad22ec2e7efea049b8737-Reviews.html", "metareview": "", "pdf_size": 1595599, "gs_citation": 2661, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8767887416569707674&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Google Brain; Google Brain; Google Brain; Google Brain; Google Brain + University of California Berkeley; Google Brain", "aff_domain": "mit.edu;google.com;google.com;google.com;google.com;google.com", "email": "mit.edu;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "https://goo.gl/hBmhDt", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/294a8ed24b1ad22ec2e7efea049b8737-Abstract.html", "aff_unique_index": "0;0;0;0;0+1;0", "aff_unique_norm": "Google;University of California, Berkeley", "aff_unique_dep": "Google Brain;", "aff_unique_url": "https://brain.google.com;https://www.berkeley.edu", "aff_unique_abbr": "Google Brain;UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0+1;0", "aff_campus_unique": "Mountain View;Berkeley", "aff_country_unique_index": "0;0;0;0;0+0;0", "aff_country_unique": "United States" }, { "title": "Scalable Coordinated Exploration in Concurrent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11418", "id": "11418", "author_site": "Maria Dimakopoulou, Ian Osband, Benjamin Van Roy", "author": "Maria Dimakopoulou; Ian Osband; Benjamin Van Roy", "abstract": "We consider a team of reinforcement learning agents that concurrently operate in a common environment, and we develop an approach to efficient coordinated exploration that is suitable for problems of practical scale. Our approach builds on the seed sampling concept introduced in Dimakopoulou and Van Roy (2018) and on a randomized value function learning algorithm from Osband et al. (2016). We demonstrate that, for simple tabular contexts, the approach is competitive with those previously proposed in Dimakopoulou and Van Roy (2018) and with a higher-dimensional problem and a neural network value function representation, the approach learns quickly with far fewer agents than alternative exploration schemes.", "bibtex": "@inproceedings{NEURIPS2018_1f4fe6a4,\n author = {Dimakopoulou, Maria and Osband, Ian and Van Roy, Benjamin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scalable Coordinated Exploration in Concurrent Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1f4fe6a4411edc2ff625888b4093e917-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1f4fe6a4411edc2ff625888b4093e917-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1f4fe6a4411edc2ff625888b4093e917-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1f4fe6a4411edc2ff625888b4093e917-Reviews.html", "metareview": "", "pdf_size": 877217, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5319871067417609300&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "Stanford University; Google DeepMind; Stanford University", "aff_domain": "stanford.edu;google.com;stanford.edu", "email": "stanford.edu;google.com;stanford.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1f4fe6a4411edc2ff625888b4093e917-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.stanford.edu;https://deepmind.com", "aff_unique_abbr": "Stanford;DeepMind", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Scalable End-to-End Autonomous Vehicle Testing via Rare-event Simulation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11933", "id": "11933", "author_site": "Matthew O'Kelly, Aman Sinha, Hongseok Namkoong, Russ Tedrake, John Duchi", "author": "Matthew O'Kelly; Aman Sinha; Hongseok Namkoong; Russ Tedrake; John C. Duchi", "abstract": "While recent developments in autonomous vehicle (AV) technology highlight substantial progress, we lack tools for rigorous and scalable testing. Real-world testing, the de facto evaluation environment, places the public in danger, and, due to the rare nature of accidents, will require billions of miles in order to statistically validate performance claims. We implement a simulation framework that can test an entire modern autonomous driving system, including, in particular, systems that employ deep-learning perception and control algorithms. Using adaptive importance-sampling methods to accelerate rare-event probability evaluation, we estimate the probability of an accident under a base distribution governing standard traffic behavior. We demonstrate our framework on a highway scenario, accelerating system evaluation by 2-20 times over naive Monte Carlo sampling methods and 10-300P times (where P is the number of processors) over real-world testing.", "bibtex": "@inproceedings{NEURIPS2018_653c579e,\n author = {O\\textquotesingle Kelly, Matthew and Sinha, Aman and Namkoong, Hongseok and Tedrake, Russ and Duchi, John C},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scalable End-to-End Autonomous Vehicle Testing via Rare-event Simulation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/653c579e3f9ba5c03f2f2f8cf4512b39-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/653c579e3f9ba5c03f2f2f8cf4512b39-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/653c579e3f9ba5c03f2f2f8cf4512b39-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/653c579e3f9ba5c03f2f2f8cf4512b39-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/653c579e3f9ba5c03f2f2f8cf4512b39-Reviews.html", "metareview": "", "pdf_size": 694991, "gs_citation": 301, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5564001038044175212&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "University of Pennsylvania; Stanford University; Stanford University; Stanford University; Massachusetts Institute of Technology", "aff_domain": "seas.upenn.edu;stanford.edu;stanford.edu;stanford.edu;mit.edu", "email": "seas.upenn.edu;stanford.edu;stanford.edu;stanford.edu;mit.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/653c579e3f9ba5c03f2f2f8cf4512b39-Abstract.html", "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "University of Pennsylvania;Stanford University;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.upenn.edu;https://www.stanford.edu;https://web.mit.edu", "aff_unique_abbr": "UPenn;Stanford;MIT", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Scalable Hyperparameter Transfer Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11660", "id": "11660", "author_site": "Valerio Perrone, Rodolphe Jenatton, Matthias W Seeger, Cedric Archambeau", "author": "Valerio Perrone; Rodolphe Jenatton; Matthias W Seeger; Cedric Archambeau", "abstract": "Bayesian optimization (BO) is a model-based approach for gradient-free black-box function optimization, such as hyperparameter optimization. Typically, BO relies on conventional Gaussian process (GP) regression, whose algorithmic complexity is cubic in the number of evaluations. As a result, GP-based BO cannot leverage large numbers of past function evaluations, for example, to warm-start related BO runs. We propose a multi-task adaptive Bayesian linear regression model for transfer learning in BO, whose complexity is linear in the function evaluations: one Bayesian linear regression model is associated to each black-box function optimization problem (or task), while transfer learning is achieved by coupling the models through a shared deep neural net. Experiments show that the neural net learns a representation suitable for warm-starting the black-box optimization problems and that BO runs can be accelerated when the target black-box function (e.g., validation loss) is learned together with other related signals (e.g., training loss). The proposed method was found to be at least one order of magnitude faster that methods recently published in the literature.", "bibtex": "@inproceedings{NEURIPS2018_14c879f3,\n author = {Perrone, Valerio and Jenatton, Rodolphe and Seeger, Matthias W and Archambeau, Cedric},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scalable Hyperparameter Transfer Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/14c879f3f5d8ed93a09f6090d77c2cc3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/14c879f3f5d8ed93a09f6090d77c2cc3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/14c879f3f5d8ed93a09f6090d77c2cc3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/14c879f3f5d8ed93a09f6090d77c2cc3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/14c879f3f5d8ed93a09f6090d77c2cc3-Reviews.html", "metareview": "", "pdf_size": 519510, "gs_citation": 190, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1099355500348485889&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Amazon; Amazon; Amazon; Amazon", "aff_domain": "amazon.com;amazon.com;amazon.com;amazon.com", "email": "amazon.com;amazon.com;amazon.com;amazon.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/14c879f3f5d8ed93a09f6090d77c2cc3-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Scalable Laplacian K-modes", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11952", "id": "11952", "author_site": "Imtiaz Ziko, Eric Granger, Ismail Ben Ayed", "author": "Imtiaz Ziko; Eric Granger; Ismail Ben Ayed", "abstract": "We advocate Laplacian K-modes for joint clustering and density mode finding,\nand propose a concave-convex relaxation of the problem, which yields a parallel\nalgorithm that scales up to large datasets and high dimensions. We optimize a tight\nbound (auxiliary function) of our relaxation, which, at each iteration, amounts to\ncomputing an independent update for each cluster-assignment variable, with guar-\nanteed convergence. Therefore, our bound optimizer can be trivially distributed\nfor large-scale data sets. Furthermore, we show that the density modes can be\nobtained as byproducts of the assignment variables via simple maximum-value\noperations whose additional computational cost is linear in the number of data\npoints. Our formulation does not need storing a full affinity matrix and computing\nits eigenvalue decomposition, neither does it perform expensive projection steps\nand Lagrangian-dual inner iterates for the simplex constraints of each point. Fur-\nthermore, unlike mean-shift, our density-mode estimation does not require inner-\nloop gradient-ascent iterates. It has a complexity independent of feature-space\ndimension, yields modes that are valid data points in the input set and is appli-\ncable to discrete domains as well as arbitrary kernels. We report comprehensive\nexperiments over various data sets, which show that our algorithm yields very\ncompetitive performances in term of optimization quality (i.e., the value of the\ndiscrete-variable objective at convergence) and clustering accuracy.", "bibtex": "@inproceedings{NEURIPS2018_3d387d26,\n author = {Ziko, Imtiaz and Granger, Eric and Ben Ayed, Ismail},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scalable Laplacian K-modes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3d387d2612f9027154ed3b99a7427da1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3d387d2612f9027154ed3b99a7427da1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3d387d2612f9027154ed3b99a7427da1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3d387d2612f9027154ed3b99a7427da1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3d387d2612f9027154ed3b99a7427da1-Reviews.html", "metareview": "", "pdf_size": 1474698, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=673736975875501078&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "\u00c9TS Montreal; \u00c9TS Montreal; \u00c9TS Montreal", "aff_domain": "etsmtl.ca; ; ", "email": "etsmtl.ca; ; ", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3d387d2612f9027154ed3b99a7427da1-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "\u00c9cole de technologie sup\u00e9rieure", "aff_unique_dep": "", "aff_unique_url": "https://www.etsmtl.ca", "aff_unique_abbr": "\u00c9TS", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Montreal", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Scalable Robust Matrix Factorization with Nonconvex Loss", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11495", "id": "11495", "author_site": "Quanming Yao, James Kwok", "author": "Quanming Yao; James Kwok", "abstract": "Robust matrix factorization (RMF), which uses the $\\ell_1$-loss, often outperforms standard matrix factorization using the $\\ell_2$-loss, particularly when outliers are present. The state-of-the-art RMF solver is the RMF-MM algorithm, which, however, cannot utilize data sparsity. Moreover, sometimes even the (convex) $\\ell_1$-loss is not robust enough. In this paper, we propose the use of nonconvex loss to enhance robustness. To address the resultant difficult optimization problem, we use majorization-minimization (MM) optimization and propose a new MM surrogate. To improve scalability, we exploit data sparsity and optimize the surrogate via its dual with the accelerated proximal gradient algorithm. The resultant algorithm has low time and space complexities and is guaranteed to converge to a critical point. Extensive experiments demonstrate its superiority over the state-of-the-art in terms of both accuracy and scalability.", "bibtex": "@inproceedings{NEURIPS2018_2c3ddf4b,\n author = {Yao, Quanming and Kwok, James},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scalable Robust Matrix Factorization with Nonconvex Loss},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2c3ddf4bf13852db711dd1901fb517fa-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2c3ddf4bf13852db711dd1901fb517fa-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2c3ddf4bf13852db711dd1901fb517fa-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2c3ddf4bf13852db711dd1901fb517fa-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2c3ddf4bf13852db711dd1901fb517fa-Reviews.html", "metareview": "", "pdf_size": 543800, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=977188123612428430&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Paradigm Inc. Beijing, China + Department of Computer Science and Engineering, Hong Kong University of Science and Technology, Hong Kong; Department of Computer Science and Engineering, Hong Kong University of Science and Technology, Hong Kong", "aff_domain": "4paradigm.com;cse.ust.hk", "email": "4paradigm.com;cse.ust.hk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2c3ddf4bf13852db711dd1901fb517fa-Abstract.html", "aff_unique_index": "0+1;1", "aff_unique_norm": "Paradigm Inc.;Hong Kong University of Science and Technology", "aff_unique_dep": ";Department of Computer Science and Engineering", "aff_unique_url": ";https://www.ust.hk", "aff_unique_abbr": ";HKUST", "aff_campus_unique_index": "0+1;1", "aff_campus_unique": "Beijing;Hong Kong SAR", "aff_country_unique_index": "0+0;0", "aff_country_unique": "China" }, { "title": "Scalable methods for 8-bit training of neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11503", "id": "11503", "author_site": "Ron Banner, Itay Hubara, Elad Hoffer, Daniel Soudry", "author": "Ron Banner; Itay Hubara; Elad Hoffer; Daniel Soudry", "abstract": "Quantized Neural Networks (QNNs) are often used to improve network efficiency during the inference phase, i.e. after the network has been trained. Extensive research in the field suggests many different quantization schemes. Still, the number of bits required, as well as the best quantization scheme, are yet unknown. Our theoretical analysis suggests that most of the training process is robust to substantial precision reduction, and points to only a few specific operations that require higher precision. Armed with this knowledge, we quantize the model parameters, activations and layer gradients to 8-bit, leaving at higher precision only the final step in the computation of the weight gradients. Additionally, as QNNs require batch-normalization to be trained at high precision, we introduce Range Batch-Normalization (BN) which has significantly higher tolerance to quantization noise and improved computational complexity. Our simulations show that Range BN is equivalent to the traditional batch norm if a precise scale adjustment, which can be approximated analytically, is applied. To the best of the authors' knowledge, this work is the first to quantize the weights, activations, as well as a substantial volume of the gradients stream, in all layers (including batch normalization) to 8-bit while showing state-of-the-art results over the ImageNet-1K dataset.", "bibtex": "@inproceedings{NEURIPS2018_e82c4b19,\n author = {Banner, Ron and Hubara, Itay and Hoffer, Elad and Soudry, Daniel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scalable methods for 8-bit training of neural networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e82c4b19b8151ddc25d4d93baf7b908f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e82c4b19b8151ddc25d4d93baf7b908f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e82c4b19b8151ddc25d4d93baf7b908f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e82c4b19b8151ddc25d4d93baf7b908f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e82c4b19b8151ddc25d4d93baf7b908f-Reviews.html", "metareview": "", "pdf_size": 468929, "gs_citation": 451, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6261172322646700444&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Intel - Artificial Intelligence Products Group; Technion - Israel Institute of Technology; Technion - Israel Institute of Technology; Technion - Israel Institute of Technology", "aff_domain": "intel.com;gmail.com;gmail.com;gmail.com", "email": "intel.com;gmail.com;gmail.com;gmail.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e82c4b19b8151ddc25d4d93baf7b908f-Abstract.html", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Intel;Technion - Israel Institute of Technology", "aff_unique_dep": "Artificial Intelligence Products Group;", "aff_unique_url": "https://www.intel.com;https://www.technion.ac.il/en/", "aff_unique_abbr": "Intel;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Israel" }, { "title": "Scalar Posterior Sampling with Applications", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11738", "id": "11738", "author_site": "Georgios Theocharous, Zheng Wen, Yasin Abbasi Yadkori, Nikos Vlassis", "author": "Georgios Theocharous; Zheng Wen; Yasin Abbasi Yadkori; Nikos Vlassis", "abstract": "We propose a practical non-episodic PSRL algorithm that unlike recent state-of-the-art PSRL algorithms uses a deterministic, model-independent episode switching schedule. Our algorithm termed deterministic schedule PSRL (DS-PSRL) is efficient in terms of time, sample, and space complexity. We prove a Bayesian regret bound under mild assumptions. Our result is more generally applicable to multiple parameters and continuous state action problems. We compare our algorithm with state-of-the-art PSRL algorithms on standard discrete and continuous problems from the literature. Finally, we show how the assumptions of our algorithm satisfy a sensible parameterization for a large class of problems in sequential recommendations.", "bibtex": "@inproceedings{NEURIPS2018_c157297d,\n author = {Theocharous, Georgios and Wen, Zheng and Abbasi Yadkori, Yasin and Vlassis, Nikos},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scalar Posterior Sampling with Applications},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c157297d1a1ff043255bfb18530caaa2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c157297d1a1ff043255bfb18530caaa2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c157297d1a1ff043255bfb18530caaa2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c157297d1a1ff043255bfb18530caaa2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c157297d1a1ff043255bfb18530caaa2-Reviews.html", "metareview": "", "pdf_size": 475628, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16594225581211546497&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Adobe Research; Adobe Research; Adobe Research; Net\ufb02ix", "aff_domain": "adobe.com;adobe.com;adobe.com;netflix.com", "email": "adobe.com;adobe.com;adobe.com;netflix.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c157297d1a1ff043255bfb18530caaa2-Abstract.html", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Adobe;Netflix", "aff_unique_dep": "Adobe Research;", "aff_unique_url": "https://research.adobe.com;https://www.netflix.com", "aff_unique_abbr": "Adobe;Netflix", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Scaling Gaussian Process Regression with Derivatives", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11662", "id": "11662", "author_site": "David Eriksson, Kun Dong, Eric Lee, David Bindel, Andrew Wilson", "author": "David Eriksson; Kun Dong; Eric Lee; David Bindel; Andrew G Wilson", "abstract": "Gaussian processes (GPs) with derivatives are useful in many applications, including Bayesian optimization, implicit surface reconstruction, and terrain reconstruction. Fitting a GP to function values and derivatives at $n$ points in $d$ dimensions requires linear solves and log determinants with an ${n(d+1) \\times n(d+1)}$ positive definite matrix-- leading to prohibitive $\\mathcal{O}(n^3d^3)$ computations for standard direct methods. We propose iterative solvers using fast $\\mathcal{O}(nd)$ matrix-vector multiplications (MVMs), together with pivoted Cholesky preconditioning that cuts the iterations to convergence by several orders of magnitude, allowing for fast kernel learning and prediction. Our approaches, together with dimensionality reduction, allows us to scale Bayesian optimization with derivatives to high-dimensional problems and large evaluation budgets.", "bibtex": "@inproceedings{NEURIPS2018_c2f32522,\n author = {Eriksson, David and Dong, Kun and Lee, Eric and Bindel, David and Wilson, Andrew G},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scaling Gaussian Process Regression with Derivatives},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c2f32522a84d5e6357e6abac087f1b0b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c2f32522a84d5e6357e6abac087f1b0b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c2f32522a84d5e6357e6abac087f1b0b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c2f32522a84d5e6357e6abac087f1b0b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c2f32522a84d5e6357e6abac087f1b0b-Reviews.html", "metareview": "", "pdf_size": 4760208, "gs_citation": 109, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12933093226685125068&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Center for Applied Mathematics, Cornell University; Center for Applied Mathematics, Cornell University; Department of Computer Science, Cornell University; Department of Computer Science, Cornell University; School of Operations Research and Information Engineering, Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu;cornell.edu;cornell.edu", "email": "cornell.edu;cornell.edu;cornell.edu;cornell.edu;cornell.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c2f32522a84d5e6357e6abac087f1b0b-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "Center for Applied Mathematics", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Scaling provable adversarial defenses", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11803", "id": "11803", "author_site": "Eric Wong, Frank Schmidt, Jan Hendrik Metzen, J. Zico Kolter", "author": "Eric Wong; Frank Schmidt; Jan Hendrik Metzen; J. Zico Kolter", "abstract": "Recent work has developed methods for learning deep network classifiers that are \\emph{provably} robust to norm-bounded adversarial perturbation; however, these methods are currently only possible for relatively small feedforward networks. In this paper, in an effort to scale these approaches to substantially larger models, we extend previous work in three main directly. First, we present a technique for extending these training procedures to much more general networks, with skip connections (such as ResNets) and general nonlinearities; the approach is fully modular, and can be implemented automatically analogously to automatic differentiation. Second, in the specific case of $\\ell_\\infty$ adversarial perturbations and networks with ReLU nonlinearities, we adopt a nonlinear random projection for training, which scales \\emph{linearly} in the number of hidden units (previous approached scaled quadratically). Third, we show how to further improve robust error through cascade models. On both MNIST and CIFAR data sets, we train classifiers that improve substantially on the state of the art in provable robust adversarial error bounds: from 5.8% to 3.1% on MNIST (with $\\ell_\\infty$ perturbations of $\\epsilon=0.1$), and from 80% to 36.4% on CIFAR (with $\\ell_\\infty$ perturbations of $\\epsilon=2/255$).", "bibtex": "@inproceedings{NEURIPS2018_358f9e7b,\n author = {Wong, Eric and Schmidt, Frank and Metzen, Jan Hendrik and Kolter, J. Zico},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scaling provable adversarial defenses},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/358f9e7be09177c17d0d17ff73584307-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/358f9e7be09177c17d0d17ff73584307-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/358f9e7be09177c17d0d17ff73584307-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/358f9e7be09177c17d0d17ff73584307-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/358f9e7be09177c17d0d17ff73584307-Reviews.html", "metareview": "", "pdf_size": 693087, "gs_citation": 507, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17860970585851528849&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Machine Learning Department, Carnegie Mellon University; Bosch Center for Arti\ufb01cial Intelligence; Bosch Center for Arti\ufb01cial Intelligence; Computer Science Department, Carnegie Mellon University + Bosch Center for Arti\ufb01cial Intelligence", "aff_domain": "cs.cmu.edu;de.bosch.com;de.bosch.com;cs.cmu.edu", "email": "cs.cmu.edu;de.bosch.com;de.bosch.com;cs.cmu.edu", "github": "https://github.com/locuslab/convex_adversarial/", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/358f9e7be09177c17d0d17ff73584307-Abstract.html", "aff_unique_index": "0;1;1;0+1", "aff_unique_norm": "Carnegie Mellon University;Bosch Center for Arti\ufb01cial Intelligence", "aff_unique_dep": "Machine Learning Department;Artificial Intelligence", "aff_unique_url": "https://www.cmu.edu;https://www.bosch-ai.com", "aff_unique_abbr": "CMU;BCAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0+1", "aff_country_unique": "United States;Germany" }, { "title": "Scaling the Poisson GLM to massive neural datasets through polynomial approximations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11353", "id": "11353", "author_site": "David Zoltowski, Jonathan Pillow", "author": "David Zoltowski; Jonathan W Pillow", "abstract": "Recent advances in recording technologies have allowed neuroscientists to record simultaneous spiking activity from hundreds to thousands of neurons in multiple brain regions. Such large-scale recordings pose a major challenge to existing statistical methods for neural data analysis. Here we develop highly scalable approximate inference methods for Poisson generalized linear models (GLMs) that require only a single pass over the data. Our approach relies on a recently proposed method for obtaining approximate sufficient statistics for GLMs using polynomial approximations [Huggins et al., 2017], which we adapt to the Poisson GLM setting. We focus on inference using quadratic approximations to nonlinear terms in the Poisson GLM log-likelihood with Gaussian priors, for which we derive closed-form solutions to the approximate maximum likelihood and MAP estimates, posterior distribution, and marginal likelihood. We introduce an adaptive procedure to select the polynomial approximation interval and show that the resulting method allows for efficient and accurate inference and regularization of high-dimensional parameters. We use the quadratic estimator to fit a fully-coupled Poisson GLM to spike train data recorded from 831 neurons across five regions of the mouse brain for a duration of 41 minutes, binned at 1 ms resolution. Across all neurons, this model is fit to over 2 billion spike count bins and identifies fine-timescale statistical dependencies between neurons within and across cortical and subcortical areas.", "bibtex": "@inproceedings{NEURIPS2018_3fab5890,\n author = {Zoltowski, David and Pillow, Jonathan W},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scaling the Poisson GLM to massive neural datasets through polynomial approximations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3fab5890d8113d0b5a4178201dc842ad-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3fab5890d8113d0b5a4178201dc842ad-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3fab5890d8113d0b5a4178201dc842ad-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3fab5890d8113d0b5a4178201dc842ad-Reviews.html", "metareview": "", "pdf_size": 1331979, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11374903574277212390&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": ";", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3fab5890d8113d0b5a4178201dc842ad-Abstract.html" }, { "title": "Searching for Efficient Multi-Scale Architectures for Dense Image Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11831", "id": "11831", "author_site": "Liang-Chieh Chen, Maxwell Collins, Yukun Zhu, George Papandreou, Barret Zoph, Florian Schroff, Hartwig Adam, Jonathon Shlens", "author": "Liang-Chieh Chen; Maxwell Collins; Yukun Zhu; George Papandreou; Barret Zoph; Florian Schroff; Hartwig Adam; Jon Shlens", "abstract": "The design of neural network architectures is an important component for achieving state-of-the-art performance with machine learning systems across a broad array of tasks. Much work has endeavored to design and build architectures automatically through clever construction of a search space paired with simple learning algorithms. Recent progress has demonstrated that such meta-learning methods may exceed scalable human-invented architectures on image classification tasks. An open question is the degree to which such methods may generalize to new domains. In this work we explore the construction of meta-learning techniques for dense image prediction focused on the tasks of scene parsing, person-part segmentation, and semantic image segmentation. Constructing viable search spaces in this domain is challenging because of the multi-scale representation of visual information and the necessity to operate on high resolution imagery. Based on a survey of techniques in dense image prediction, we construct a recursive search space and demonstrate that even with efficient random search, we can identify architectures that outperform human-invented architectures and achieve state-of-the-art performance on three dense prediction tasks including 82.7% on Cityscapes (street scene parsing), 71.3% on PASCAL-Person-Part (person-part segmentation), and 87.9% on PASCAL VOC 2012 (semantic image segmentation). Additionally, the resulting architecture is more computationally efficient, requiring half the parameters and half the computational cost as previous state of the art systems.", "bibtex": "@inproceedings{NEURIPS2018_c90070e1,\n author = {Chen, Liang-Chieh and Collins, Maxwell and Zhu, Yukun and Papandreou, George and Zoph, Barret and Schroff, Florian and Adam, Hartwig and Shlens, Jon},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Searching for Efficient Multi-Scale Architectures for Dense Image Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c90070e1f03e982448983975a0f52d57-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c90070e1f03e982448983975a0f52d57-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c90070e1f03e982448983975a0f52d57-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c90070e1f03e982448983975a0f52d57-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c90070e1f03e982448983975a0f52d57-Reviews.html", "metareview": "", "pdf_size": 676536, "gs_citation": 506, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13756553035520492080&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": ";;;;;;;", "aff_domain": ";;;;;;;", "email": ";;;;;;;", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c90070e1f03e982448983975a0f52d57-Abstract.html" }, { "title": "See and Think: Disentangling Semantic Scene Completion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11052", "id": "11052", "author_site": "Shice Liu, YU HU, Yiming Zeng, Qiankun Tang, Beibei Jin, Yinhe Han, Xiaowei Li", "author": "Shice Liu; YU HU; Yiming Zeng; Qiankun Tang; Beibei Jin; Yinhe Han; Xiaowei Li", "abstract": "Semantic scene completion predicts volumetric occupancy and object category of a 3D scene, which helps intelligent agents to understand and interact with the surroundings. In this work, we propose a disentangled framework, sequentially carrying out 2D semantic segmentation, 2D-3D reprojection and 3D semantic scene completion. This three-stage framework has three advantages: (1) explicit semantic segmentation significantly boosts performance; (2) flexible fusion ways of sensor data bring good extensibility; (3) progress in any subtask will promote the holistic performance. Experimental results show that regardless of inputing a single depth or RGB-D, our framework can generate high-quality semantic scene completion, and outperforms state-of-the-art approaches on both synthetic and real datasets.", "bibtex": "@inproceedings{NEURIPS2018_9872ed9f,\n author = {Liu, Shice and HU, YU and Zeng, Yiming and Tang, Qiankun and Jin, Beibei and Han, Yinhe and Li, Xiaowei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {See and Think: Disentangling Semantic Scene Completion},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9872ed9fc22fc182d371c3e9ed316094-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9872ed9fc22fc182d371c3e9ed316094-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9872ed9fc22fc182d371c3e9ed316094-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9872ed9fc22fc182d371c3e9ed316094-Reviews.html", "metareview": "", "pdf_size": 1438276, "gs_citation": 111, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3218225429355211096&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "State Key Laboratory of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences + University of Chinese Academy of Sciences; State Key Laboratory of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences + University of Chinese Academy of Sciences; State Key Laboratory of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences + University of Chinese Academy of Sciences; State Key Laboratory of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences + University of Chinese Academy of Sciences; State Key Laboratory of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences + University of Chinese Academy of Sciences; State Key Laboratory of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences + University of Chinese Academy of Sciences; State Key Laboratory of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences + University of Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "email": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9872ed9fc22fc182d371c3e9ed316094-Abstract.html", "aff_unique_index": "0+1;0+1;0+1;0+1;0+1;0+1;0+1", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.cas.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": ";;;;;;", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0+0;0+0;0+0;0+0;0+0;0+0", "aff_country_unique": "China" }, { "title": "Self-Erasing Network for Integral Object Attention", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11078", "id": "11078", "author_site": "Qibin Hou, PengTao Jiang, Yunchao Wei, Ming-Ming Cheng", "author": "Qibin Hou; PengTao Jiang; Yunchao Wei; Ming-Ming Cheng", "abstract": "Recently, adversarial erasing for weakly-supervised object attention has been deeply studied due to its capability in localizing integral object regions. However, such a strategy raises one key problem that attention regions will gradually expand to non-object regions as training iterations continue, which significantly decreases the quality of the produced attention maps. To tackle such an issue as well as promote the quality of object attention, we introduce a simple yet effective Self-Erasing Network (SeeNet) to prohibit attentions from spreading to unexpected background regions. In particular, SeeNet leverages two self-erasing strategies to encourage networks to use reliable object and background cues for learning to attention. In this way, integral object regions can be effectively highlighted without including much more background regions. To test the quality of the generated attention maps, we employ the mined object regions as heuristic cues for learning semantic segmentation models. Experiments on Pascal VOC well demonstrate the superiority of our SeeNet over other state-of-the-art methods.", "bibtex": "@inproceedings{NEURIPS2018_c042f4db,\n author = {Hou, Qibin and Jiang, PengTao and Wei, Yunchao and Cheng, Ming-Ming},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Self-Erasing Network for Integral Object Attention},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c042f4db68f23406c6cecf84a7ebb0fe-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c042f4db68f23406c6cecf84a7ebb0fe-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c042f4db68f23406c6cecf84a7ebb0fe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c042f4db68f23406c6cecf84a7ebb0fe-Reviews.html", "metareview": "", "pdf_size": 10248218, "gs_citation": 351, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7560584184130809449&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Colledge of Computer Science, Nankai University; Colledge of Computer Science, Nankai University; UIUC; Colledge of Computer Science, Nankai University", "aff_domain": "gmail.com; ; ;nankai.edu.cn", "email": "gmail.com; ; ;nankai.edu.cn", "github": "", "project": "http://mmcheng.net/SeeNet/", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c042f4db68f23406c6cecf84a7ebb0fe-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nankai University;University of Illinois Urbana-Champaign", "aff_unique_dep": "College of Computer Science;", "aff_unique_url": "http://www.nankai.edu.cn;https://www illinois.edu", "aff_unique_abbr": "Nankai;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Self-Supervised Generation of Spatial Audio for 360\u00b0 Video", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11061", "id": "11061", "author_site": "Pedro Morgado, Nuno Nvasconcelos, Timothy Langlois, Oliver Wang", "author": "Pedro Morgado; Nuno Nvasconcelos; Timothy Langlois; Oliver Wang", "abstract": "We introduce an approach to convert mono audio recorded by a 360\u00b0 video camera into spatial audio, a representation of the distribution of sound over the full viewing sphere. Spatial audio is an important component of immersive 360\u00b0 video viewing, but spatial audio microphones are still rare in current 360\u00b0 video production. Our system consists of end-to-end trainable neural networks that separate individual sound sources and localize them on the viewing sphere, conditioned on multi-modal analysis from the audio and 360\u00b0 video frames. We introduce several datasets, including one filmed ourselves, and one collected in-the-wild from YouTube, consisting of 360\u00b0 videos uploaded with spatial audio. During training, ground truth spatial audio serves as self-supervision and a mixed down mono track forms the input to our network. Using our approach we show that it is possible to infer the spatial localization of sounds based only on a synchronized 360\u00b0 video and the mono audio track.", "bibtex": "@inproceedings{NEURIPS2018_01161aaa,\n author = {Morgado, Pedro and Nvasconcelos, Nuno and Langlois, Timothy and Wang, Oliver},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Self-Supervised Generation of Spatial Audio for 360\\textdegree Video},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/01161aaa0b6d1345dd8fe4e481144d84-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/01161aaa0b6d1345dd8fe4e481144d84-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/01161aaa0b6d1345dd8fe4e481144d84-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/01161aaa0b6d1345dd8fe4e481144d84-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/01161aaa0b6d1345dd8fe4e481144d84-Reviews.html", "metareview": "", "pdf_size": 4464776, "gs_citation": 201, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3401747373664383714&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "University of California, San Diego; University of California, San Diego; Adobe Research, Seattle; Adobe Research, Seattle", "aff_domain": "eng.ucsd.edu; ; ; ", "email": "eng.ucsd.edu; ; ; ", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/01161aaa0b6d1345dd8fe4e481144d84-Abstract.html", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of California, San Diego;Adobe", "aff_unique_dep": ";Adobe Research", "aff_unique_url": "https://www.ucsd.edu;https://research.adobe.com", "aff_unique_abbr": "UCSD;Adobe", "aff_campus_unique_index": "0;0;1;1", "aff_campus_unique": "San Diego;Seattle", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Semi-Supervised Learning with Declaratively Specified Entropy Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11437", "id": "11437", "author_site": "Haitian Sun, William Cohen, Lidong Bing", "author": "Haitian Sun; William W. Cohen; Lidong Bing", "abstract": "We propose a technique for declaratively specifying strategies for semi-supervised learning (SSL). SSL methods based on different assumptions perform differently on different tasks, which leads to difficulties applying them in practice. In this paper, we propose to use entropy to unify many types of constraints. Our method can be used to easily specify ensembles of semi-supervised learners, as well as agreement constraints and entropic regularization constraints between these learners, and can be used to model both well-known heuristics such as co-training, and novel domain-specific heuristics. Besides, our model is flexible as to the underlying learning mechanism. Compared to prior frameworks for specifying SSL techniques, our technique achieves consistent improvements on a suite of well-studied SSL benchmarks, and obtains a new state-of-the-art result on a difficult relation extraction task.", "bibtex": "@inproceedings{NEURIPS2018_12b1e42d,\n author = {Sun, Haitian and Cohen, William W and Bing, Lidong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Semi-Supervised Learning with Declaratively Specified Entropy Constraints},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/12b1e42dc0746f22cf361267de07073f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/12b1e42dc0746f22cf361267de07073f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/12b1e42dc0746f22cf361267de07073f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/12b1e42dc0746f22cf361267de07073f-Reviews.html", "metareview": "", "pdf_size": 652110, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11176590584376429911&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Machine Learning Department, Carnegie Mellon University; R&D Center Singapore, Machine Intelligence Technology, Alibaba DAMO Academy + Tencent AI Lab; Machine Learning Department, Carnegie Mellon University", "aff_domain": "cs.cmu.edu;alibaba-inc.com;cs.cmu.edu", "email": "cs.cmu.edu;alibaba-inc.com;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/12b1e42dc0746f22cf361267de07073f-Abstract.html", "aff_unique_index": "0;1+2;0", "aff_unique_norm": "Carnegie Mellon University;Alibaba DAMO Academy;Tencent", "aff_unique_dep": "Machine Learning Department;Machine Intelligence Technology;Tencent AI Lab", "aff_unique_url": "https://www.cmu.edu;https://damo.alibaba.com;https://ai.tencent.com", "aff_unique_abbr": "CMU;Alibaba DAMO;Tencent AI Lab", "aff_campus_unique_index": "1", "aff_campus_unique": ";Singapore", "aff_country_unique_index": "0;1+2;0", "aff_country_unique": "United States;Singapore;China" }, { "title": "Semi-crowdsourced Clustering with Deep Generative Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11325", "id": "11325", "author_site": "Yucen Luo, TIAN TIAN, Jiaxin Shi, Jun Zhu, Bo Zhang", "author": "Yucen Luo; TIAN TIAN; Jiaxin Shi; Jun Zhu; Bo Zhang", "abstract": "We consider the semi-supervised clustering problem where crowdsourcing provides noisy information about the pairwise comparisons on a small subset of data, i.e., whether a sample pair is in the same cluster. We propose a new approach that includes a deep generative model (DGM) to characterize low-level features of the data, and a statistical relational model for noisy pairwise annotations on its subset. The two parts share the latent variables. To make the model automatically trade-off between its complexity and fitting data, we also develop its fully Bayesian variant. The challenge of inference is addressed by fast (natural-gradient) stochastic variational inference algorithms, where we effectively combine variational message passing for the relational part and amortized learning of the DGM under a unified framework. Empirical results on synthetic and real-world datasets show that our model outperforms previous crowdsourced clustering methods.", "bibtex": "@inproceedings{NEURIPS2018_3c1e4bd6,\n author = {Luo, Yucen and TIAN, TIAN and Shi, Jiaxin and Zhu, Jun and Zhang, Bo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Semi-crowdsourced Clustering with Deep Generative Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3c1e4bd67169b8153e0047536c9f541e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3c1e4bd67169b8153e0047536c9f541e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3c1e4bd67169b8153e0047536c9f541e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3c1e4bd67169b8153e0047536c9f541e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3c1e4bd67169b8153e0047536c9f541e-Reviews.html", "metareview": "", "pdf_size": 1009331, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5214247288739307462&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Dept. of Comp. Sci. & Tech., Institute for AI, THBI Lab, BNRist Center, State Key Lab for Intell. Tech. & Sys., Tsinghua University, Beijing, China; Dept. of Comp. Sci. & Tech., Institute for AI, THBI Lab, BNRist Center, State Key Lab for Intell. Tech. & Sys., Tsinghua University, Beijing, China; Dept. of Comp. Sci. & Tech., Institute for AI, THBI Lab, BNRist Center, State Key Lab for Intell. Tech. & Sys., Tsinghua University, Beijing, China; Dept. of Comp. Sci. & Tech., Institute for AI, THBI Lab, BNRist Center, State Key Lab for Intell. Tech. & Sys., Tsinghua University, Beijing, China; Dept. of Comp. Sci. & Tech., Institute for AI, THBI Lab, BNRist Center, State Key Lab for Intell. Tech. & Sys., Tsinghua University, Beijing, China", "aff_domain": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;163.com;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn", "email": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;163.com;mail.tsinghua.edu.cn;mail.tsinghua.edu.cn", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3c1e4bd67169b8153e0047536c9f541e-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Dept. of Comp. Sci. & Tech.", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Semi-supervised Deep Kernel Learning: Regression with Unlabeled Data by Minimizing Predictive Variance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11520", "id": "11520", "author_site": "Neal Jean, Sang Michael Xie, Stefano Ermon", "author": "Neal Jean; Sang Michael Xie; Stefano Ermon", "abstract": "Large amounts of labeled data are typically required to train deep learning models. For many real-world problems, however, acquiring additional data can be expensive or even impossible. We present semi-supervised deep kernel learning (SSDKL), a semi-supervised regression model based on minimizing predictive variance in the posterior regularization framework. SSDKL combines the hierarchical representation learning of neural networks with the probabilistic modeling capabilities of Gaussian processes. By leveraging unlabeled data, we show improvements on a diverse set of real-world regression tasks over supervised deep kernel learning and semi-supervised methods such as VAT and mean teacher adapted for regression.", "bibtex": "@inproceedings{NEURIPS2018_9d28de8f,\n author = {Jean, Neal and Xie, Sang Michael and Ermon, Stefano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Semi-supervised Deep Kernel Learning: Regression with Unlabeled Data by Minimizing Predictive Variance},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9d28de8ff9bb6a3fa41fddfdc28f3bc1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9d28de8ff9bb6a3fa41fddfdc28f3bc1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9d28de8ff9bb6a3fa41fddfdc28f3bc1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9d28de8ff9bb6a3fa41fddfdc28f3bc1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9d28de8ff9bb6a3fa41fddfdc28f3bc1-Reviews.html", "metareview": "", "pdf_size": 379767, "gs_citation": 99, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6491716866958005670&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9d28de8ff9bb6a3fa41fddfdc28f3bc1-Abstract.html" }, { "title": "Semidefinite relaxations for certifying robustness to adversarial examples", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12028", "id": "12028", "author_site": "Aditi Raghunathan, Jacob Steinhardt, Percy Liang", "author": "Aditi Raghunathan; Jacob Steinhardt; Percy Liang", "abstract": "Despite their impressive performance on diverse tasks, neural networks fail catastrophically in the presence of adversarial inputs\u2014imperceptibly but adversarially perturbed versions of natural inputs. We have witnessed an arms race between defenders who attempt to train robust networks and attackers who try to construct adversarial examples. One promise of ending the arms race is developing certified defenses, ones which are provably robust against all attackers in some family. These certified defenses are based on convex relaxations which construct an upper bound on the worst case loss over all attackers in the family. Previous relaxations are loose on networks that are not trained against the respective relaxation. In this paper, we propose a new semidefinite relaxation for certifying robustness that applies to arbitrary ReLU networks. We show that our proposed relaxation is tighter than previous relaxations and produces meaningful robustness guarantees on three different foreign networks whose training objectives are agnostic to our proposed relaxation.", "bibtex": "@inproceedings{NEURIPS2018_29c0605a,\n author = {Raghunathan, Aditi and Steinhardt, Jacob and Liang, Percy S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Semidefinite relaxations for certifying robustness to adversarial examples},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/29c0605a3bab4229e46723f89cf59d83-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/29c0605a3bab4229e46723f89cf59d83-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/29c0605a3bab4229e46723f89cf59d83-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/29c0605a3bab4229e46723f89cf59d83-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/29c0605a3bab4229e46723f89cf59d83-Reviews.html", "metareview": "", "pdf_size": 568572, "gs_citation": 538, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2990782721469246862&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Stanford University; Stanford University; Stanford University", "aff_domain": "cs.stanford.edu;cs.stanford.edu;cs.stanford.edu", "email": "cs.stanford.edu;cs.stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/29c0605a3bab4229e46723f89cf59d83-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sequence-to-Segment Networks for Segment Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11352", "id": "11352", "author_site": "Zijun Wei, Boyu Wang, Minh Hoai Nguyen, Jianming Zhang, Zhe Lin, Xiaohui Shen, Radomir Mech, Dimitris Samaras", "author": "Zijun Wei; Boyu Wang; Minh Hoai Nguyen; Jianming Zhang; Zhe Lin; Xiaohui Shen; Radomir Mech; Dimitris Samaras", "abstract": "Detecting segments of interest from an input sequence is a challenging problem which often requires not only good knowledge of individual target segments, but also contextual understanding of the entire input sequence and the relationships between the target segments. To address this problem, we propose the Sequence-to-Segment Network (S$^2$N), a novel end-to-end sequential encoder-decoder architecture. S$^2$N first encodes the input into a sequence of hidden states that progressively capture both local and holistic information. It then employs a novel decoding architecture, called Segment Detection Unit (SDU), that integrates the decoder state and encoder hidden states to detect segments sequentially. During training, we formulate the assignment of predicted segments to ground truth as bipartite matching and use the Earth Mover's Distance to calculate the localization errors. We experiment with S$^2$N on temporal action proposal generation and video summarization and show that S$^2$N achieves state-of-the-art performance on both tasks.", "bibtex": "@inproceedings{NEURIPS2018_59e0b265,\n author = {Wei, Zijun and Wang, Boyu and Nguyen, Minh Hoai and Zhang, Jianming and Lin, Zhe and Shen, Xiaohui and Mech, Radomir and Samaras, Dimitris},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sequence-to-Segment Networks for Segment Detection},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/59e0b2658e9f2e77f8d4d83f8d07ca84-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/59e0b2658e9f2e77f8d4d83f8d07ca84-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/59e0b2658e9f2e77f8d4d83f8d07ca84-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/59e0b2658e9f2e77f8d4d83f8d07ca84-Reviews.html", "metareview": "", "pdf_size": 936151, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=217469821709285374&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": ";;;;;;;", "aff_domain": ";;;;;;;", "email": ";;;;;;;", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/59e0b2658e9f2e77f8d4d83f8d07ca84-Abstract.html" }, { "title": "Sequential Attend, Infer, Repeat: Generative Modelling of Moving Objects", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11822", "id": "11822", "author_site": "Adam Kosiorek, Hyunjik Kim, Yee Whye Teh, Ingmar Posner", "author": "Adam Kosiorek; Hyunjik Kim; Yee Whye Teh; Ingmar Posner", "abstract": "We present Sequential Attend, Infer, Repeat (SQAIR), an interpretable deep generative model for image sequences.\nIt can reliably discover and track objects through the sequence; it can also conditionally generate future frames, thereby simulating expected motion of objects. \nThis is achieved by explicitly encoding object numbers, locations and appearances in the latent variables of the model.\nSQAIR retains all strengths of its predecessor, Attend, Infer, Repeat (AIR, Eslami et. al. 2016), including unsupervised learning, made possible by inductive biases present in the model structure.\nWe use a moving multi-\\textsc{mnist} dataset to show limitations of AIR in detecting overlapping or partially occluded objects, and show how \\textsc{sqair} overcomes them by leveraging temporal consistency of objects.\nFinally, we also apply SQAIR to real-world pedestrian CCTV data, where it learns to reliably detect, track and generate walking pedestrians with no supervision.", "bibtex": "@inproceedings{NEURIPS2018_7417744a,\n author = {Kosiorek, Adam and Kim, Hyunjik and Teh, Yee Whye and Posner, Ingmar},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sequential Attend, Infer, Repeat: Generative Modelling of Moving Objects},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7417744a2bac776fabe5a09b21c707a2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7417744a2bac776fabe5a09b21c707a2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7417744a2bac776fabe5a09b21c707a2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7417744a2bac776fabe5a09b21c707a2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7417744a2bac776fabe5a09b21c707a2-Reviews.html", "metareview": "", "pdf_size": 1764675, "gs_citation": 287, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7430884807828197721&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Applied Artificial Intelligence Lab+Oxford Robotics Institute+University of Oxford; Department of Statistics+University of Oxford; Applied Artificial Intelligence Lab+Oxford Robotics Institute+University of Oxford; Department of Statistics+University of Oxford", "aff_domain": "robots.ox.ac.uk; ; ; ", "email": "robots.ox.ac.uk; ; ; ", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7417744a2bac776fabe5a09b21c707a2-Abstract.html", "aff_unique_index": "0+1+1;2+1;0+1+1;2+1", "aff_unique_norm": "Applied Artificial Intelligence Lab;University of Oxford;University Affiliation Not Specified", "aff_unique_dep": "Artificial Intelligence;Oxford Robotics Institute;Department of Statistics", "aff_unique_url": ";https://www.ox.ac.uk;", "aff_unique_abbr": ";Oxford;", "aff_campus_unique_index": "1;;1;", "aff_campus_unique": ";Oxford", "aff_country_unique_index": "1+1;1;1+1;1", "aff_country_unique": ";United Kingdom" }, { "title": "Sequential Context Encoding for Duplicate Removal", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11217", "id": "11217", "author_site": "Lu Qi, Shu Liu, Jianping Shi, Jiaya Jia", "author": "Lu Qi; Shu Liu; Jianping Shi; Jiaya Jia", "abstract": "Duplicate removal is a critical step to accomplish a reasonable amount of predictions in prevalent proposal-based object detection frameworks. Albeit simple and effective, most previous algorithms utilized a greedy process without making sufficient use of properties of input data. In this work, we design a new two-stage framework to effectively select the appropriate proposal candidate for each object. The first stage suppresses most of easy negative object proposals, while the second stage selects true positives in the reduced proposal set. These two stages share the same network structure, an encoder and a decoder formed as recurrent neural networks (RNN) with global attention and context gate. The encoder scans proposal candidates in a sequential manner to capture the global context information, which is then fed to the decoder to extract optimal proposals. In our extensive experiments, the proposed method outperforms other alternatives by a large margin.", "bibtex": "@inproceedings{NEURIPS2018_ce5140df,\n author = {Qi, Lu and Liu, Shu and Shi, Jianping and Jia, Jiaya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sequential Context Encoding for Duplicate Removal},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ce5140df15d046a66883807d18d0264b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ce5140df15d046a66883807d18d0264b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ce5140df15d046a66883807d18d0264b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ce5140df15d046a66883807d18d0264b-Reviews.html", "metareview": "", "pdf_size": 764809, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10864880171281918188&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "The Chinese University of Hong Kong; The Chinese University of Hong Kong + SenseTime Research + YouTu Lab, Tencent; SenseTime Research; The Chinese University of Hong Kong + YouTu Lab, Tencent", "aff_domain": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;sensetime.com;cse.cuhk.edu.hk", "email": "cse.cuhk.edu.hk;cse.cuhk.edu.hk;sensetime.com;cse.cuhk.edu.hk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ce5140df15d046a66883807d18d0264b-Abstract.html", "aff_unique_index": "0;0+1+2;1;0+2", "aff_unique_norm": "Chinese University of Hong Kong;SenseTime;Tencent", "aff_unique_dep": ";SenseTime Research;YouTu Lab", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.sensetime.com;https://www.tencent.com", "aff_unique_abbr": "CUHK;SenseTime;Tencent", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0+0+0;0;0+0", "aff_country_unique": "China" }, { "title": "Sequential Test for the Lowest Mean: From Thompson to Murphy Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11613", "id": "11613", "author_site": "Emilie Kaufmann, Wouter Koolen, Aur\u00e9lien Garivier", "author": "Emilie Kaufmann; Wouter M. Koolen; Aur\u00e9lien Garivier", "abstract": "Learning the minimum/maximum mean among a finite set of distributions is a fundamental sub-problem in planning, game tree search and reinforcement learning. We formalize this learning task as the problem of sequentially testing how the minimum mean among a finite set of distributions compares to a given threshold. We develop refined non-asymptotic lower bounds, which show that optimality mandates very different sampling behavior for a low vs high true minimum. We show that Thompson Sampling and the intuitive Lower Confidence Bounds policy each nail only one of these cases. We develop a novel approach that we call Murphy Sampling. Even though it entertains exclusively low true minima, we prove that MS is optimal for both possibilities. We then design advanced self-normalized deviation inequalities, fueling more aggressive stopping rules. We complement our theoretical guarantees by experiments showing that MS works best in practice.", "bibtex": "@inproceedings{NEURIPS2018_7c78335a,\n author = {Kaufmann, Emilie and Koolen, Wouter M and Garivier, Aur\\'{e}lien},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sequential Test for the Lowest Mean: From Thompson to Murphy Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7c78335a8924215ea5c22fda1aac7b75-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7c78335a8924215ea5c22fda1aac7b75-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7c78335a8924215ea5c22fda1aac7b75-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7c78335a8924215ea5c22fda1aac7b75-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7c78335a8924215ea5c22fda1aac7b75-Reviews.html", "metareview": "", "pdf_size": 515081, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13245284185773199555&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 18, "aff": "CNRS & U. Lille, CRIStAL / SequeL Inria Lille; Centrum Wiskunde & Informatica, Amsterdam; UMPA, \u00c9cole normale sup\u00e9rieure de Lyon", "aff_domain": "univ-lille.fr;cwi.nl;ens-lyon.fr", "email": "univ-lille.fr;cwi.nl;ens-lyon.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7c78335a8924215ea5c22fda1aac7b75-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "CNRS;Centrum Wiskunde & Informatica;\u00c9cole Normale Sup\u00e9rieure de Lyon", "aff_unique_dep": ";;UMPA", "aff_unique_url": "https://www.cnrs.fr;https://www.cwi.nl;https://www.ens-lyon.fr", "aff_unique_abbr": "CNRS;CWI;ENS de Lyon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amsterdam", "aff_country_unique_index": "0;1;0", "aff_country_unique": "France;Netherlands" }, { "title": "Sharp Bounds for Generalized Uniformity Testing", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11601", "id": "11601", "author_site": "Ilias Diakonikolas, Daniel M. Kane, Alistair Stewart", "author": "Ilias Diakonikolas; Daniel M. Kane; Alistair Stewart", "abstract": "We study the problem of generalized uniformity testing of a discrete probability distribution: Given samples from a probability distribution p over an unknown size discrete domain \u2126, we want to distinguish, with probability at least 2/3, between the case that p is uniform on some subset of \u2126 versus \u03b5-far, in total variation distance, from any such uniform distribution. We establish tight bounds on the sample complexity of generalized uniformity testing. In more detail, we present a computationally efficient tester whose sample complexity is optimal, within constant factors, and a matching worst-case information-theoretic lower bound. Specifically, we show that the sample complexity of generalized uniformity testing is \u0398(1/(\u03b5^(4/3) ||p||", "bibtex": "@inproceedings{NEURIPS2018_fc325d4b,\n author = {Diakonikolas, Ilias and Kane, Daniel M. and Stewart, Alistair},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sharp Bounds for Generalized Uniformity Testing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fc325d4b598aaede18b53dca4ecfcb9c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fc325d4b598aaede18b53dca4ecfcb9c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/fc325d4b598aaede18b53dca4ecfcb9c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fc325d4b598aaede18b53dca4ecfcb9c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fc325d4b598aaede18b53dca4ecfcb9c-Reviews.html", "metareview": "", "pdf_size": 267411, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=339744362543359475&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "University of Southern California; University of California, San Diego; University of Southern California", "aff_domain": "usc.edu;ucsd.edu;gmail.com", "email": "usc.edu;ucsd.edu;gmail.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fc325d4b598aaede18b53dca4ecfcb9c-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Southern California;University of California, San Diego", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;https://www.ucsd.edu", "aff_unique_abbr": "USC;UCSD", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Los Angeles;San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sigsoftmax: Reanalysis of the Softmax Bottleneck", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11054", "id": "11054", "author_site": "Sekitoshi Kanai, Yasuhiro Fujiwara, Yuki Yamanaka, Shuichi Adachi", "author": "Sekitoshi Kanai; Yasuhiro Fujiwara; Yuki Yamanaka; Shuichi Adachi", "abstract": "Softmax is an output activation function for modeling categorical probability distributions in many applications of deep learning. However, a recent study revealed that softmax can be a bottleneck of representational capacity of neural networks in language modeling (the softmax bottleneck). In this paper, we propose an output activation function for breaking the softmax bottleneck without additional parameters. We re-analyze the softmax bottleneck from the perspective of the output set of log-softmax and identify the cause of the softmax bottleneck. On the basis of this analysis, we propose sigsoftmax, which is composed of a multiplication of an exponential function and sigmoid function. Sigsoftmax can break the softmax bottleneck. The experiments on language modeling demonstrate that sigsoftmax and mixture of sigsoftmax outperform softmax and mixture of softmax, respectively.", "bibtex": "@inproceedings{NEURIPS2018_9dcb88e0,\n author = {Kanai, Sekitoshi and Fujiwara, Yasuhiro and Yamanaka, Yuki and Adachi, Shuichi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sigsoftmax: Reanalysis of the Softmax Bottleneck},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9dcb88e0137649590b755372b040afad-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9dcb88e0137649590b755372b040afad-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9dcb88e0137649590b755372b040afad-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9dcb88e0137649590b755372b040afad-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9dcb88e0137649590b755372b040afad-Reviews.html", "metareview": "", "pdf_size": 432822, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10623672535862709237&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "NTT Software Innovation Center+Keio Univ.; NTT Software Innovation Center; NTT Secure Platform Laboratories; Keio Univ.", "aff_domain": "lab.ntt.co.jp;lab.ntt.co.jp;lab.ntt.co.jp;appi.keio.ac.jp", "email": "lab.ntt.co.jp;lab.ntt.co.jp;lab.ntt.co.jp;appi.keio.ac.jp", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9dcb88e0137649590b755372b040afad-Abstract.html", "aff_unique_index": "0+1;0;2;1", "aff_unique_norm": "NTT Software Innovation Center;Keio University;NTT Secure Platform Laboratories", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntt-sic.com/;https://www.keio.ac.jp;https://www.ntt.co.jp", "aff_unique_abbr": ";Keio;NTT SPL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;0;0", "aff_country_unique": "Japan" }, { "title": "SimplE Embedding for Link Prediction in Knowledge Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11424", "id": "11424", "author_site": "Seyed Mehran Kazemi, David Poole", "author": "Seyed Mehran Kazemi; David Poole", "abstract": "Knowledge graphs contain knowledge about the world and provide a structured representation of this knowledge. Current knowledge graphs contain only a small subset of what is true in the world. Link prediction approaches aim at predicting new links for a knowledge graph given the existing links among the entities. Tensor factorization approaches have proved promising for such link prediction problems. Proposed in 1927, Canonical Polyadic (CP) decomposition is among the first tensor factorization approaches. CP generally performs poorly for link prediction as it learns two independent embedding vectors for each entity, whereas they are really tied. We present a simple enhancement of CP (which we call SimplE) to allow the two embeddings of each entity to be learned dependently. The complexity of SimplE grows linearly with the size of embeddings. The embeddings learned through SimplE are interpretable, and certain types of background knowledge can be incorporated into these embeddings through weight tying. \nWe prove SimplE is fully expressive and derive a bound on the size of its embeddings for full expressivity. \nWe show empirically that, despite its simplicity, SimplE outperforms several state-of-the-art tensor factorization techniques.\nSimplE's code is available on GitHub at https://github.com/Mehran-k/SimplE.", "bibtex": "@inproceedings{NEURIPS2018_b2ab0019,\n author = {Kazemi, Seyed Mehran and Poole, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SimplE Embedding for Link Prediction in Knowledge Graphs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b2ab001909a8a6f04b51920306046ce5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b2ab001909a8a6f04b51920306046ce5-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b2ab001909a8a6f04b51920306046ce5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b2ab001909a8a6f04b51920306046ce5-Reviews.html", "metareview": "", "pdf_size": 469072, "gs_citation": 1041, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1390081697322675650&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "University of British Columbia, Vancouver, BC, Canada; University of British Columbia, Vancouver, BC, Canada", "aff_domain": "cs.ubc.ca;cs.ubc.ca", "email": "cs.ubc.ca;cs.ubc.ca", "github": "https://github.com/Mehran-k/SimplE", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b2ab001909a8a6f04b51920306046ce5-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Vancouver", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Simple random search of static linear policies is competitive for reinforcement learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11193", "id": "11193", "author_site": "Horia Mania, Aurelia Guy, Benjamin Recht", "author": "Horia Mania; Aurelia Guy; Benjamin Recht", "abstract": "Model-free reinforcement learning aims to offer off-the-shelf solutions for controlling dynamical systems without requiring models of the system dynamics. We introduce a model-free random search algorithm for training static, linear policies for continuous control problems. Common evaluation methodology shows that our method matches state-of-the-art sample efficiency on the benchmark MuJoCo locomotion tasks. Nonetheless, more rigorous evaluation reveals that the assessment of performance on these benchmarks is optimistic. We evaluate the performance of our method over hundreds of random seeds and many different hyperparameter configurations for each benchmark task. This extensive evaluation is possible because of the small computational footprint of our method. Our simulations highlight a high variability in performance in these benchmark tasks, indicating that commonly used estimations of sample efficiency do not adequately evaluate the performance of RL algorithms. Our results stress the need for new baselines, benchmarks and evaluation methodology for RL algorithms.", "bibtex": "@inproceedings{NEURIPS2018_7634ea65,\n author = {Mania, Horia and Guy, Aurelia and Recht, Benjamin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Simple random search of static linear policies is competitive for reinforcement learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7634ea65a4e6d9041cfd3f7de18e334a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7634ea65a4e6d9041cfd3f7de18e334a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7634ea65a4e6d9041cfd3f7de18e334a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7634ea65a4e6d9041cfd3f7de18e334a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7634ea65a4e6d9041cfd3f7de18e334a-Reviews.html", "metareview": "", "pdf_size": 994283, "gs_citation": 331, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7785327924982490543&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Department of Electrical Engineering and Computer Science, University of California, Berkeley; Department of Electrical Engineering and Computer Science, University of California, Berkeley; Department of Electrical Engineering and Computer Science, University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "email": "berkeley.edu;berkeley.edu;berkeley.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7634ea65a4e6d9041cfd3f7de18e334a-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Department of Electrical Engineering and Computer Science", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Simple, Distributed, and Accelerated Probabilistic Programming", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11730", "id": "11730", "author_site": "Dustin Tran, Matthew Hoffman, Dave Moore, Christopher Suter, Srinivas Vasudevan, Alexey Radul, Matthew Johnson, Rif A. Saurous", "author": "Dustin Tran; Matthew W Hoffman; Dave Moore; Christopher Suter; Srinivas Vasudevan; Alexey Radul", "abstract": "We describe a simple, low-level approach for embedding probabilistic programming in a deep learning ecosystem. In particular, we distill probabilistic programming down to a single abstraction\u2014the random variable. Our lightweight implementation in TensorFlow enables numerous applications: a model-parallel variational auto-encoder (VAE) with 2nd-generation tensor processing units (TPUv2s); a data-parallel autoregressive model (Image Transformer) with TPUv2s; and multi-GPU No-U-Turn Sampler (NUTS). For both a state-of-the-art VAE on 64x64 ImageNet and Image Transformer on 256x256 CelebA-HQ, our approach achieves an optimal linear speedup from 1 to 256 TPUv2 chips. With NUTS, we see a 100x speedup on GPUs over Stan and 37x over PyMC3.", "bibtex": "@inproceedings{NEURIPS2018_201e5bac,\n author = {Tran, Dustin and Hoffman, Matthew W and Moore, Dave and Suter, Christopher and Vasudevan, Srinivas and Radul, Alexey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Simple, Distributed, and Accelerated Probabilistic Programming},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/201e5bacd665709851b77148e225b332-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/201e5bacd665709851b77148e225b332-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/201e5bacd665709851b77148e225b332-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/201e5bacd665709851b77148e225b332-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/201e5bacd665709851b77148e225b332-Reviews.html", "metareview": "", "pdf_size": 605661, "gs_citation": 88, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13609598957162867163&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/201e5bacd665709851b77148e225b332-Abstract.html" }, { "title": "Single-Agent Policy Tree Search With Guarantees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11324", "id": "11324", "author_site": "Laurent Orseau, Levi Lelis, Tor Lattimore, Theophane Weber", "author": "Laurent Orseau; Levi Lelis; Tor Lattimore; Theophane Weber", "abstract": "We introduce two novel tree search algorithms that use a policy to guide\nsearch. The first algorithm is a best-first enumeration that uses a cost\nfunction that allows us to provide an upper bound on the number of nodes\nto be expanded before reaching a goal state. We show that this best-first\nalgorithm is particularly well suited for ``needle-in-a-haystack'' problems.\nThe second algorithm, which is based on sampling, provides an\nupper bound on the expected number of nodes to be expanded before\nreaching a set of goal states. We show that this algorithm is better\nsuited for problems where many paths lead to a goal. We validate these tree\nsearch algorithms on 1,000 computer-generated levels of Sokoban, where the\npolicy used to guide search comes from a neural network trained using A3C. Our\nresults show that the policy tree search algorithms we introduce are\ncompetitive with a state-of-the-art domain-independent planner that uses\nheuristic search.", "bibtex": "@inproceedings{NEURIPS2018_52c51893,\n author = {Orseau, Laurent and Lelis, Levi and Lattimore, Tor and Weber, Theophane},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Single-Agent Policy Tree Search With Guarantees},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/52c5189391854c93e8a0e1326e56c14f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/52c5189391854c93e8a0e1326e56c14f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/52c5189391854c93e8a0e1326e56c14f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/52c5189391854c93e8a0e1326e56c14f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/52c5189391854c93e8a0e1326e56c14f-Reviews.html", "metareview": "", "pdf_size": 447793, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17454634556201960088&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "DeepMind, London, UK; Universidade Federal de Vi\u00e7osa, Brazil + University of Alberta, Canada; DeepMind, London, UK; DeepMind, London, UK", "aff_domain": "google.com;ufv.br;google.com;google.com", "email": "google.com;ufv.br;google.com;google.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/52c5189391854c93e8a0e1326e56c14f-Abstract.html", "aff_unique_index": "0;1+2;0;0", "aff_unique_norm": "DeepMind;Universidade Federal de Vi\u00e7osa;University of Alberta", "aff_unique_dep": ";;", "aff_unique_url": "https://deepmind.com;http://www.ufv.br;https://www.ualberta.ca", "aff_unique_abbr": "DeepMind;UFV;UAlberta", "aff_campus_unique_index": "0;;0;0", "aff_campus_unique": "London;", "aff_country_unique_index": "0;1+2;0;0", "aff_country_unique": "United Kingdom;Brazil;Canada" }, { "title": "Size-Noise Tradeoffs in Generative Networks", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11627", "id": "11627", "author_site": "Bolton Bailey, Matus Telgarsky", "author": "Bolton Bailey; Matus J Telgarsky", "abstract": "This paper investigates the ability of generative networks to convert their input noise distributions into other distributions. Firstly, we demonstrate a construction that allows ReLU networks to increase the dimensionality of their noise distribution by implementing a ``space-filling'' function based on iterated tent maps. We show this construction is optimal by analyzing the number of affine pieces in functions computed by multivariate ReLU networks. Secondly, we provide efficient ways (using polylog$(1/\\epsilon)$ nodes) for networks to pass between univariate uniform and normal distributions, using a Taylor series approximation and a binary search gadget for computing function inverses. Lastly, we indicate how high dimensional distributions can be efficiently transformed into low dimensional distributions.", "bibtex": "@inproceedings{NEURIPS2018_9bd5ee6f,\n author = {Bailey, Bolton and Telgarsky, Matus J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Size-Noise Tradeoffs in Generative Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9bd5ee6fe55aaeb673025dbcb8f939c1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9bd5ee6fe55aaeb673025dbcb8f939c1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9bd5ee6fe55aaeb673025dbcb8f939c1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9bd5ee6fe55aaeb673025dbcb8f939c1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9bd5ee6fe55aaeb673025dbcb8f939c1-Reviews.html", "metareview": "", "pdf_size": 518992, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9887813113199196630&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "University of Illinois, Urbana-Champaign; University of Illinois, Urbana-Champaign", "aff_domain": "illinois.edu;illinois.edu", "email": "illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9bd5ee6fe55aaeb673025dbcb8f939c1-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Sketching Method for Large Scale Combinatorial Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12002", "id": "12002", "author_site": "Wei Sun, Junwei Lu, Han Liu", "author": "Wei Sun; Junwei Lu; Han Liu", "abstract": "We present computationally efficient algorithms to test various combinatorial structures of large-scale graphical models. In order to test the hypotheses on their topological structures, we propose two adjacency matrix sketching frameworks: neighborhood sketching and subgraph sketching. The neighborhood sketching algorithm is proposed to test the connectivity of graphical models. This algorithm randomly subsamples vertices and conducts neighborhood regression and screening. The global sketching algorithm is proposed to test the topological properties requiring exponential computation complexity, especially testing the chromatic number and the maximum clique. This algorithm infers the corresponding property based on the sampled subgraph. Our algorithms are shown to substantially accelerate the computation of existing methods. We validate our theory and method through both synthetic simulations and a real application in neuroscience.", "bibtex": "@inproceedings{NEURIPS2018_cb463625,\n author = {Sun, Wei and Lu, Junwei and Liu, Han},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sketching Method for Large Scale Combinatorial Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cb463625fc9dde2d82207e15bde1b674-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cb463625fc9dde2d82207e15bde1b674-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cb463625fc9dde2d82207e15bde1b674-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cb463625fc9dde2d82207e15bde1b674-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cb463625fc9dde2d82207e15bde1b674-Reviews.html", "metareview": "", "pdf_size": 1823648, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14001446161977913807&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Department of Management Science, University of Miami; Department of Biostatistics, Harvard University; Department of Computer Science, Northwestern University", "aff_domain": "bus.miami.edu;hsph.harvard.edu;northwestern.edu", "email": "bus.miami.edu;hsph.harvard.edu;northwestern.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cb463625fc9dde2d82207e15bde1b674-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Miami;Harvard University;Northwestern University", "aff_unique_dep": "Department of Management Science;Department of Biostatistics;Department of Computer Science", "aff_unique_url": "https://www.miami.edu;https://www.harvard.edu;https://www.northwestern.edu", "aff_unique_abbr": "UM;Harvard;NU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Coral Gables;Cambridge;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Smoothed Analysis of Discrete Tensor Decomposition and Assemblies of Neurons", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12026", "id": "12026", "author_site": "Nima Anari, Constantinos Daskalakis, Wolfgang Maass, Christos Papadimitriou, Amin Saberi, Santosh Vempala", "author": "Nima Anari; Constantinos Daskalakis; Wolfgang Maass; Christos Papadimitriou; Amin Saberi; Santosh Vempala", "abstract": "We analyze linear independence of rank one tensors produced by tensor powers of randomly perturbed vectors. This enables efficient decomposition of sums of high-order tensors. Our analysis builds upon [BCMV14] but allows for a wider range of perturbation models, including discrete ones. We give an application to recovering assemblies of neurons.", "bibtex": "@inproceedings{NEURIPS2018_5cc3749a,\n author = {Anari, Nima and Daskalakis, Constantinos and Maass, Wolfgang and Papadimitriou, Christos and Saberi, Amin and Vempala, Santosh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Smoothed Analysis of Discrete Tensor Decomposition and Assemblies of Neurons},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5cc3749a6e56ef6d656735dff9176074-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5cc3749a6e56ef6d656735dff9176074-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5cc3749a6e56ef6d656735dff9176074-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5cc3749a6e56ef6d656735dff9176074-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5cc3749a6e56ef6d656735dff9176074-Reviews.html", "metareview": "", "pdf_size": 448364, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2659788399964537575&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Computer Science, Stanford University; EECS, MIT; Theoretical Computer Science, Graz University of Technology; Computer Science, Columbia University; MS&E, Stanford University; Computer Science, Georgia Tech", "aff_domain": "cs.stanford.edu;csail.mit.edu;igi.tugraz.at;cs.columbia.edu;stanford.edu;gatech.edu", "email": "cs.stanford.edu;csail.mit.edu;igi.tugraz.at;cs.columbia.edu;stanford.edu;gatech.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5cc3749a6e56ef6d656735dff9176074-Abstract.html", "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "Stanford University;Massachusetts Institute of Technology;Graz University of Technology;Columbia University;Georgia Institute of Technology", "aff_unique_dep": "Computer Science;Electrical Engineering and Computer Science;Theoretical Computer Science;Computer Science;School of Computer Science", "aff_unique_url": "https://www.stanford.edu;https://www.mit.edu;https://www.tugraz.at;https://www.columbia.edu;https://www.gatech.edu", "aff_unique_abbr": "Stanford;MIT;TUGraz;Columbia;Georgia Tech", "aff_campus_unique_index": "0;1;2;0;4", "aff_campus_unique": "Stanford;Cambridge;Graz;;Atlanta", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Austria" }, { "title": "Smoothed analysis of the low-rank approach for smooth semidefinite programs", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11238", "id": "11238", "author_site": "Thomas Pumir, Samy Jelassi, Nicolas Boumal", "author": "Thomas Pumir; Samy Jelassi; Nicolas Boumal", "abstract": "We consider semidefinite programs (SDPs) of size $n$ with equality constraints. In order to overcome scalability issues, Burer and Monteiro proposed a factorized approach based on optimizing over a matrix $Y$ of size $n\\times k$ such that $X=YY^*$ is the SDP variable. The advantages of such formulation are twofold: the dimension of the optimization variable is reduced, and positive semidefiniteness is naturally enforced. However, optimization in $Y$ is non-convex. In prior work, it has been shown that, when the constraints on the factorized variable regularly define a smooth manifold, provided $k$ is large enough, for almost all cost matrices, all second-order stationary points (SOSPs) are optimal. Importantly, in practice, one can only compute points which approximately satisfy necessary optimality conditions, leading to the question: are such points also approximately optimal? To this end, and under similar assumptions, we use smoothed analysis to show that approximate SOSPs for a randomly perturbed objective function are approximate global optima, with $k$ scaling like the square root of the number of constraints (up to log factors). We particularize our results to an SDP relaxation of phase retrieval.", "bibtex": "@inproceedings{NEURIPS2018_a1d50185,\n author = {Pumir, Thomas and Jelassi, Samy and Boumal, Nicolas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Smoothed analysis of the low-rank approach for smooth semidefinite programs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a1d50185e7426cbb0acad1e6ca74b9aa-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a1d50185e7426cbb0acad1e6ca74b9aa-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a1d50185e7426cbb0acad1e6ca74b9aa-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a1d50185e7426cbb0acad1e6ca74b9aa-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a1d50185e7426cbb0acad1e6ca74b9aa-Reviews.html", "metareview": "", "pdf_size": 417913, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17027002843010679211&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "ORFE Department, Princeton University; ORFE Department, Princeton University; Department of Mathematics, Princeton University", "aff_domain": "princeton.edu;princeton.edu;math.princeton.edu", "email": "princeton.edu;princeton.edu;math.princeton.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a1d50185e7426cbb0acad1e6ca74b9aa-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "ORFE Department", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Princeton;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Snap ML: A Hierarchical Framework for Machine Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11051", "id": "11051", "author_site": "Celestine D\u00fcnner, Thomas Parnell, Dimitrios Sarigiannis, Nikolas Ioannou, Andreea Anghel, Gummadi Ravi, Madhusudanan Kandasamy, Haralampos Pozidis", "author": "Celestine D\u00fcnner; Thomas Parnell; Dimitrios Sarigiannis; Nikolas Ioannou; Andreea Anghel; Gummadi Ravi; Madhusudanan Kandasamy; Haralampos Pozidis", "abstract": "We describe a new software framework for fast training of generalized linear models. The framework, named Snap Machine Learning (Snap ML), combines recent advances in machine learning systems and algorithms in a nested manner to reflect the hierarchical architecture of modern computing systems. We prove theoretically that such a hierarchical system can accelerate training in distributed environments where intra-node communication is cheaper than inter-node communication. Additionally, we provide a review of the implementation of Snap ML in terms of GPU acceleration, pipelining, communication patterns and software architecture, highlighting aspects that were critical for achieving high performance. We evaluate the performance of Snap ML in both single-node and multi-node environments, quantifying the benefit of the hierarchical scheme and the data streaming functionality, and comparing with other widely-used machine learning software frameworks. Finally, we present a logistic regression benchmark on the Criteo Terabyte Click Logs dataset and show that Snap ML achieves the same test loss an order of magnitude faster than any of the previously reported results, including those obtained using TensorFlow and scikit-learn.", "bibtex": "@inproceedings{NEURIPS2018_eecca5b6,\n author = {D\\\"{u}nner, Celestine and Parnell, Thomas and Sarigiannis, Dimitrios and Ioannou, Nikolas and Anghel, Andreea and Ravi, Gummadi and Kandasamy, Madhusudanan and Pozidis, Haralampos},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Snap ML: A Hierarchical Framework for Machine Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/eecca5b6365d9607ee5a9d336962c534-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/eecca5b6365d9607ee5a9d336962c534-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/eecca5b6365d9607ee5a9d336962c534-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/eecca5b6365d9607ee5a9d336962c534-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/eecca5b6365d9607ee5a9d336962c534-Reviews.html", "metareview": "", "pdf_size": 878227, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12093832896304347899&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "IBM Research, Zurich, Switzerland; IBM Research, Zurich, Switzerland; IBM Research, Zurich, Switzerland; IBM Research, Zurich, Switzerland; IBM Research, Zurich, Switzerland; IBM Systems, Bangalore, India; IBM Systems, Bangalore, India; IBM Research, Zurich, Switzerland", "aff_domain": "zurich.ibm.com;zurich.ibm.com;zurich.ibm.com;zurich.ibm.com;zurich.ibm.com;in.ibm.com;in.ibm.com;zurich.ibm.com", "email": "zurich.ibm.com;zurich.ibm.com;zurich.ibm.com;zurich.ibm.com;zurich.ibm.com;in.ibm.com;in.ibm.com;zurich.ibm.com", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/eecca5b6365d9607ee5a9d336962c534-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "IBM", "aff_unique_dep": "IBM Research", "aff_unique_url": "https://www.ibm.com/research", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "0;0;0;0;0;1;1;0", "aff_campus_unique": "Zurich;Bangalore", "aff_country_unique_index": "0;0;0;0;0;1;1;0", "aff_country_unique": "Switzerland;India" }, { "title": "Soft-Gated Warping-GAN for Pose-Guided Person Image Synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11071", "id": "11071", "author_site": "Haoye Dong, Xiaodan Liang, Ke Gong, Hanjiang Lai, Jia Zhu, Jian Yin", "author": "Haoye Dong; Xiaodan Liang; Ke Gong; Hanjiang Lai; Jia Zhu; Jian Yin", "abstract": "Despite remarkable advances in image synthesis research, existing works often fail in manipulating images under the context of large geometric transformations. Synthesizing person images conditioned on arbitrary poses is one of the most representative examples where the generation quality largely relies on the capability of identifying and modeling arbitrary transformations on different body parts. Current generative models are often built on local convolutions and overlook the key challenges (e.g. heavy occlusions, different views or dramatic appearance changes) when distinct geometric changes happen for each part, caused by arbitrary pose manipulations. This paper aims to resolve these challenges induced by geometric variability and spatial displacements via a new Soft-Gated Warping Generative Adversarial Network (Warping-GAN), which is composed of two stages: 1) it first synthesizes a target part segmentation map given a target pose, which depicts the region-level spatial layouts for guiding image synthesis with higher-level structure constraints; 2) the Warping-GAN equipped with a soft-gated warping-block learns feature-level mapping to render textures from the original image into the generated segmentation map. Warping-GAN is capable of controlling different transformation degrees given distinct target poses. Moreover, the proposed warping-block is light-weight and flexible enough to be injected into any networks. Human perceptual studies and quantitative evaluations demonstrate the superiority of our Warping-GAN that significantly outperforms all existing methods on two large datasets.", "bibtex": "@inproceedings{NEURIPS2018_17000029,\n author = {Dong, Haoye and Liang, Xiaodan and Gong, Ke and Lai, Hanjiang and Zhu, Jia and Yin, Jian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Soft-Gated Warping-GAN for Pose-Guided Person Image Synthesis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1700002963a49da13542e0726b7bb758-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1700002963a49da13542e0726b7bb758-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1700002963a49da13542e0726b7bb758-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1700002963a49da13542e0726b7bb758-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1700002963a49da13542e0726b7bb758-Reviews.html", "metareview": "", "pdf_size": 2452097, "gs_citation": 205, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13648330202082847411&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "School of Data and Computer Science, Sun Yat-sen University + Guangdong Key Laboratory of Big Data Analysis and Processing; School of Intelligent Systems Engineering, Sun Yat-sen University; School of Data and Computer Science, Sun Yat-sen University; School of Data and Computer Science, Sun Yat-sen University + Guangdong Key Laboratory of Big Data Analysis and Processing; School of Computer Science, South China Normal University; School of Data and Computer Science, Sun Yat-sen University + Guangdong Key Laboratory of Big Data Analysis and Processing", "aff_domain": "mail2.sysu.edu.cn;gmail.com;gmail.com;mail.sysu.edu.cn;m.scun.edu.cn;mail.sysu.edu.cn", "email": "mail2.sysu.edu.cn;gmail.com;gmail.com;mail.sysu.edu.cn;m.scun.edu.cn;mail.sysu.edu.cn", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1700002963a49da13542e0726b7bb758-Abstract.html", "aff_unique_index": "0+1;0;0;0+1;2;0+1", "aff_unique_norm": "Sun Yat-sen University;Guangdong Key Laboratory of Big Data Analysis and Processing;South China Normal University", "aff_unique_dep": "School of Data and Computer Science;Big Data Analysis and Processing;School of Computer Science", "aff_unique_url": "http://www.sysu.edu.cn/;;http://www.scnu.edu.cn", "aff_unique_abbr": "SYSU;;SCNU", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;0;0+0;0;0+0", "aff_country_unique": "China" }, { "title": "Solving Large Sequential Games with the Excessive Gap Technique", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11108", "id": "11108", "author_site": "Christian Kroer, Gabriele Farina, Tuomas Sandholm", "author": "Christian Kroer; Gabriele Farina; Tuomas Sandholm", "abstract": "There has been tremendous recent progress on equilibrium-finding algorithms for zero-sum imperfect-information extensive-form games, but there has been a puzzling gap between theory and practice. First-order methods have significantly better theoretical convergence rates than any counterfactual-regret minimization (CFR) variant. Despite this, CFR variants have been favored in practice. Experiments with first-order methods have only been conducted on small- and medium-sized games because those methods are complicated to implement in this setting, and because CFR variants have been enhanced extensively for over a decade they perform well in practice. In this paper we show that a particular first-order method, a state-of-the-art variant of the excessive gap technique---instantiated with the dilated entropy distance function---can efficiently solve large real-world problems competitively with CFR and its variants. We show this on large endgames encountered by the Libratus poker AI, which recently beat top human poker specialist professionals at no-limit Texas hold'em. We show experimental results on our variant of the excessive gap technique as well as a prior version. We introduce a numerically friendly implementation of the smoothed best response computation associated with first-order methods for extensive-form game solving. We present, to our knowledge, the first GPU implementation of a first-order method for extensive-form games. We present comparisons of several excessive gap technique and CFR variants.", "bibtex": "@inproceedings{NEURIPS2018_e836d813,\n author = {Kroer, Christian and Farina, Gabriele and Sandholm, Tuomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Solving Large Sequential Games with the Excessive Gap Technique},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e836d813fd184325132fca8edcdfb40e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e836d813fd184325132fca8edcdfb40e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e836d813fd184325132fca8edcdfb40e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e836d813fd184325132fca8edcdfb40e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e836d813fd184325132fca8edcdfb40e-Reviews.html", "metareview": "", "pdf_size": 453021, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5096902728321208225&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "Department of Computer Science, Carnegie Mellon University; Department of Computer Science, Carnegie Mellon University; Department of Computer Science, Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e836d813fd184325132fca8edcdfb40e-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Solving Non-smooth Constrained Programs with Lower Complexity than $\\mathcal{O}(1/\\varepsilon)$: A Primal-Dual Homotopy Smoothing Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11397", "id": "11397", "author_site": "Xiaohan Wei, Hao Yu, Qing Ling, Michael Neely", "author": "Xiaohan Wei; Hao Yu; Qing Ling; Michael Neely", "abstract": "We propose a new primal-dual homotopy smoothing algorithm for a linearly constrained convex program, where neither the primal nor the dual function has to be smooth or strongly convex. The best known iteration complexity solving such a non-smooth problem is $\\mathcal{O}(\\varepsilon^{-1})$. In this paper, \nwe show that by leveraging a local error bound condition on the dual function, the proposed algorithm can achieve a better primal convergence time of $\\mathcal{O}\\l(\\varepsilon^{-2/(2+\\beta)}\\log_2(\\varepsilon^{-1})\\r)$, where $\\beta\\in(0,1]$ is a local error bound parameter. \nAs an example application, we show that the distributed geometric median problem, which can be formulated as a constrained convex program, has its dual function non-smooth but satisfying the aforementioned local error bound condition with $\\beta=1/2$, therefore enjoying a convergence time of $\\mathcal{O}\\l(\\varepsilon^{-4/5}\\log_2(\\varepsilon^{-1})\\r)$. This result improves upon the $\\mathcal{O}(\\varepsilon^{-1})$ convergence time bound achieved by existing distributed optimization algorithms. Simulation experiments also demonstrate the performance of our proposed algorithm.", "bibtex": "@inproceedings{NEURIPS2018_1e4d3617,\n author = {Wei, Xiaohan and Yu, Hao and Ling, Qing and Neely, Michael},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Solving Non-smooth Constrained Programs with Lower Complexity than \\textbackslash mathcal\\lbrace O\\rbrace (1/\\textbackslash varepsilon): A Primal-Dual Homotopy Smoothing Approach},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1e4d36177d71bbb3558e43af9577d70e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1e4d36177d71bbb3558e43af9577d70e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1e4d36177d71bbb3558e43af9577d70e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1e4d36177d71bbb3558e43af9577d70e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1e4d36177d71bbb3558e43af9577d70e-Reviews.html", "metareview": "", "pdf_size": 3040685, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1e4d36177d71bbb3558e43af9577d70e-Abstract.html" }, { "title": "Sparse Attentive Backtracking: Temporal Credit Assignment Through Reminding", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11734", "id": "11734", "author_site": "Nan Rosemary Ke, Anirudh Goyal, Olexa Bilaniuk, Jonathan Binas, Michael Mozer, Chris Pal, Yoshua Bengio", "author": "Nan Rosemary Ke; Anirudh Goyal ALIAS PARTH GOYAL; Olexa Bilaniuk; Jonathan Binas; Michael Mozer; Chris Pal; Yoshua Bengio", "abstract": "Learning long-term dependencies in extended temporal sequences requires credit assignment to events far back in the past. The most common method for training recurrent neural networks, back-propagation through time (BPTT), requires credit information to be propagated backwards through every single step of the forward computation, potentially over thousands or millions of time steps.\nThis becomes computationally expensive or even infeasible when used with long sequences. Importantly, biological brains are unlikely to perform such detailed reverse replay over very long sequences of internal states (consider days, months, or years.) However, humans are often reminded of past memories or mental states which are associated with the current mental state.\nWe consider the hypothesis that such memory associations between past and present could be used for credit assignment through arbitrarily long sequences, propagating the credit assigned to the current state to the associated past state. Based on this principle, we study a novel algorithm which only back-propagates through a few of these temporal skip connections, realized by a learned attention mechanism that associates current states with relevant past states. We demonstrate in experiments that our method matches or outperforms regular BPTT and truncated BPTT in tasks involving particularly long-term dependencies, but without requiring the biologically implausible backward replay through the whole history of states. Additionally, we demonstrate that the proposed method transfers to longer sequences significantly better than LSTMs trained with BPTT and LSTMs trained with full self-attention.", "bibtex": "@inproceedings{NEURIPS2018_e16e74a6,\n author = {Ke, Nan Rosemary and ALIAS PARTH GOYAL, Anirudh Goyal and Bilaniuk, Olexa and Binas, Jonathan and Mozer, Michael C and Pal, Chris and Bengio, Yoshua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sparse Attentive Backtracking: Temporal Credit Assignment Through Reminding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e16e74a63567ecb44ade5c87002bb1d9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e16e74a63567ecb44ade5c87002bb1d9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e16e74a63567ecb44ade5c87002bb1d9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e16e74a63567ecb44ade5c87002bb1d9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e16e74a63567ecb44ade5c87002bb1d9-Reviews.html", "metareview": "", "pdf_size": 1060344, "gs_citation": 113, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13217836813694679871&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "Mila, Universit\u00e9 de Montr\u00e9al + Mila, Polytechnique Montr\u00e9al; Mila, Universit\u00e9 de Montr\u00e9al; Mila, Universit\u00e9 de Montr\u00e9al; Mila, Universit\u00e9 de Montr\u00e9al; University of Colorado, Boulder; Mila, Universit\u00e9 de Montr\u00e9al + Mila, Polytechnique Montr\u00e9al + Element AI; Mila, Universit\u00e9 de Montr\u00e9al", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e16e74a63567ecb44ade5c87002bb1d9-Abstract.html", "aff_unique_index": "0+1;0;0;0;2;0+1+3;0", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Polytechnique Montr\u00e9al;University of Colorado;Element AI", "aff_unique_dep": "Mila;Mila;;", "aff_unique_url": "https://umontreal.ca;https://www.polymtl.ca;https://www.colorado.edu;https://www.elementai.com", "aff_unique_abbr": "UdeM;Polytechnique;CU;Element AI", "aff_campus_unique_index": "0+0;0;0;0;1;0+0;0", "aff_campus_unique": "Montr\u00e9al;Boulder;", "aff_country_unique_index": "0+0;0;0;0;1;0+0+0;0", "aff_country_unique": "Canada;United States" }, { "title": "Sparse DNNs with Improved Adversarial Robustness", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11050", "id": "11050", "author_site": "Yiwen Guo, Chao Zhang, Changshui Zhang, Yurong Chen", "author": "Yiwen Guo; Chao Zhang; Changshui Zhang; Yurong Chen", "abstract": "Deep neural networks (DNNs) are computationally/memory-intensive and vulnerable to adversarial attacks, making them prohibitive in some real-world applications. By converting dense models into sparse ones, pruning appears to be a promising solution to reducing the computation/memory cost. This paper studies classification models, especially DNN-based ones, to demonstrate that there exists intrinsic relationships between their sparsity and adversarial robustness. Our analyses reveal, both theoretically and empirically, that nonlinear DNN-based classifiers behave differently under $l_2$ attacks from some linear ones. We further demonstrate that an appropriately higher model sparsity implies better robustness of nonlinear DNNs, whereas over-sparsified models can be more difficult to resist adversarial examples.", "bibtex": "@inproceedings{NEURIPS2018_4c5bde74,\n author = {Guo, Yiwen and Zhang, Chao and Zhang, Changshui and Chen, Yurong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sparse DNNs with Improved Adversarial Robustness},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4c5bde74a8f110656874902f07378009-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4c5bde74a8f110656874902f07378009-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4c5bde74a8f110656874902f07378009-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4c5bde74a8f110656874902f07378009-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4c5bde74a8f110656874902f07378009-Reviews.html", "metareview": "", "pdf_size": 1073522, "gs_citation": 204, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6928658166855434543&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Intel Labs China + Institute for Artificial Intelligence, Tsinghua University (THUAI), State Key Lab of Intelligent Technologies and Systems, Beijing National Research Center for Information Science and Technology (BNRis), Department of Automation, Tsinghua University; Academy for Advanced Interdisciplinary Studies, Center for Data Science, Peking University; Institute for Artificial Intelligence, Tsinghua University (THUAI), State Key Lab of Intelligent Technologies and Systems, Beijing National Research Center for Information Science and Technology (BNRis), Department of Automation, Tsinghua University; Intel Labs China", "aff_domain": "intel.com;intel.com;pku.edu.cn;mail.tsinghua.edu.cn", "email": "intel.com;intel.com;pku.edu.cn;mail.tsinghua.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4c5bde74a8f110656874902f07378009-Abstract.html", "aff_unique_index": "0+1;2;1;0", "aff_unique_norm": "Intel;Tsinghua University;Peking University", "aff_unique_dep": "Intel Labs;Institute for Artificial Intelligence;Center for Data Science", "aff_unique_url": "https://www.intel.cn;https://www.tsinghua.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "Intel;THU;PKU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0+0;0;0;0", "aff_country_unique": "China" }, { "title": "Sparse PCA from Sparse Linear Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12034", "id": "12034", "author_site": "Guy Bresler, Sung Min Park, Madalina Persu", "author": "Guy Bresler; Sung Min Park; Madalina Persu", "abstract": "Sparse Principal Component Analysis (SPCA) and Sparse Linear Regression (SLR) have a wide range of applications and have attracted a tremendous amount of attention in the last two decades as canonical examples of statistical problems in high dimension. A variety of algorithms have been proposed for both SPCA and SLR, but an explicit connection between the two had not been made. We show how to efficiently transform a black-box solver for SLR into an algorithm for SPCA: assuming the SLR solver satisfies prediction error guarantees achieved by existing efficient algorithms such as those based on the Lasso, the SPCA algorithm derived from it achieves near state of the art guarantees for testing and for support recovery for the single spiked covariance model as obtained by the current best polynomial-time algorithms. Our reduction not only highlights the inherent similarity between the two problems, but also, from a practical standpoint, allows one to obtain a collection of algorithms for SPCA directly from known algorithms for SLR. We provide experimental results on simulated data comparing our proposed framework to other algorithms for SPCA.", "bibtex": "@inproceedings{NEURIPS2018_74f23f9e,\n author = {Bresler, Guy and Park, Sung Min and Persu, Madalina},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sparse PCA from Sparse Linear Regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/74f23f9e28cbc5ddaae8582f48642a59-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/74f23f9e28cbc5ddaae8582f48642a59-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/74f23f9e28cbc5ddaae8582f48642a59-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/74f23f9e28cbc5ddaae8582f48642a59-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/74f23f9e28cbc5ddaae8582f48642a59-Reviews.html", "metareview": "", "pdf_size": 743930, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3595351041092320646&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "MIT; MIT; Two Sigma\u21e4+MIT", "aff_domain": "mit.edu;mit.edu;mit.edu", "email": "mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/74f23f9e28cbc5ddaae8582f48642a59-Abstract.html", "aff_unique_index": "0;0;1+0", "aff_unique_norm": "Massachusetts Institute of Technology;Two Sigma", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.twosigma.com", "aff_unique_abbr": "MIT;Two Sigma", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+0", "aff_country_unique": "United States" }, { "title": "Sparsified SGD with Memory", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11439", "id": "11439", "author_site": "Sebastian Stich, Jean-Baptiste Cordonnier, Martin Jaggi", "author": "Sebastian U Stich; Jean-Baptiste Cordonnier; Martin Jaggi", "abstract": "Huge scale machine learning problems are nowadays tackled by distributed optimization algorithms, i.e. algorithms that leverage the compute power of many devices for training. The communication overhead is a key bottleneck that hinders perfect scalability. Various recent works proposed to use quantization or sparsification techniques to reduce the amount of data that needs to be communicated, for instance by only sending the most significant entries of the stochastic gradient (top-k sparsification). Whilst such schemes showed very promising performance in practice, they have eluded theoretical analysis so far.", "bibtex": "@inproceedings{NEURIPS2018_b440509a,\n author = {Stich, Sebastian U and Cordonnier, Jean-Baptiste and Jaggi, Martin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sparsified SGD with Memory},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b440509a0106086a67bc2ea9df0a1dab-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b440509a0106086a67bc2ea9df0a1dab-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b440509a0106086a67bc2ea9df0a1dab-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b440509a0106086a67bc2ea9df0a1dab-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b440509a0106086a67bc2ea9df0a1dab-Reviews.html", "metareview": "", "pdf_size": 606183, "gs_citation": 1007, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6832257024596167334&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b440509a0106086a67bc2ea9df0a1dab-Abstract.html" }, { "title": "Speaker-Follower Models for Vision-and-Language Navigation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11334", "id": "11334", "author_site": "Daniel Fried, Ronghang Hu, Volkan Cirik, Anna Rohrbach, Jacob Andreas, Louis-Philippe Morency, Taylor Berg-Kirkpatrick, Kate Saenko, Dan Klein, Trevor Darrell", "author": "Daniel Fried; Ronghang Hu; Volkan Cirik; Anna Rohrbach; Jacob Andreas; Louis-Philippe Morency; Taylor Berg-Kirkpatrick; Kate Saenko; Dan Klein; Trevor Darrell", "abstract": "Navigation guided by natural language instructions presents a challenging reasoning problem for instruction followers. Natural language instructions typically identify only a few high-level decisions and landmarks rather than complete low-level motor behaviors; much of the missing information must be inferred based on perceptual context. In machine learning settings, this is doubly challenging: it is difficult to collect enough annotated data to enable learning of this reasoning process from scratch, and also difficult to implement the reasoning process using generic sequence models. Here we describe an approach to vision-and-language navigation that addresses both these issues with an embedded speaker model. We use this speaker model to (1) synthesize new instructions for data augmentation and to (2) implement pragmatic reasoning, which evaluates how well candidate action sequences explain an instruction. Both steps are supported by a panoramic action space that reflects the granularity of human-generated instructions. Experiments show that all three components of this approach---speaker-driven data augmentation, pragmatic reasoning and panoramic action space---dramatically improve the performance of a baseline instruction follower, more than doubling the success rate over the best existing approach on a standard benchmark.", "bibtex": "@inproceedings{NEURIPS2018_6a81681a,\n author = {Fried, Daniel and Hu, Ronghang and Cirik, Volkan and Rohrbach, Anna and Andreas, Jacob and Morency, Louis-Philippe and Berg-Kirkpatrick, Taylor and Saenko, Kate and Klein, Dan and Darrell, Trevor},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Speaker-Follower Models for Vision-and-Language Navigation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6a81681a7af700c6385d36577ebec359-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6a81681a7af700c6385d36577ebec359-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6a81681a7af700c6385d36577ebec359-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6a81681a7af700c6385d36577ebec359-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6a81681a7af700c6385d36577ebec359-Reviews.html", "metareview": "", "pdf_size": 2991332, "gs_citation": 589, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4902418707565650524&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "University of California, Berkeley; University of California, Berkeley; Carnegie Mellon University; University of California, Berkeley; University of California, Berkeley; Carnegie Mellon University; Carnegie Mellon University; Boston University; University of California, Berkeley; University of California, Berkeley", "aff_domain": "; ; ; ; ; ; ; ; ; ", "email": "; ; ; ; ; ; ; ; ; ", "github": "", "project": "", "author_num": 10, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6a81681a7af700c6385d36577ebec359-Abstract.html", "aff_unique_index": "0;0;1;0;0;1;1;2;0;0", "aff_unique_norm": "University of California, Berkeley;Carnegie Mellon University;Boston University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.berkeley.edu;https://www.cmu.edu;https://www.bu.edu", "aff_unique_abbr": "UC Berkeley;CMU;BU", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Spectral Filtering for General Linear Dynamical Systems", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11456", "id": "11456", "author_site": "Elad Hazan, Holden Lee, Karan Singh, Cyril Zhang, Yi Zhang", "author": "Elad Hazan; Holden Lee; Karan Singh; Cyril Zhang; Yi Zhang", "abstract": "We give a polynomial-time algorithm for learning latent-state linear dynamical systems without system identification, and without assumptions on the spectral radius of the system's transition matrix. The algorithm extends the recently introduced technique of spectral filtering, previously applied only to systems with a symmetric transition matrix, using a novel convex relaxation to allow for the efficient identification of phases.", "bibtex": "@inproceedings{NEURIPS2018_d6288499,\n author = {Hazan, Elad and Lee, Holden and Singh, Karan and Zhang, Cyril and Zhang, Yi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Spectral Filtering for General Linear Dynamical Systems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d6288499d0083cc34e60a077b7c4b3e1-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d6288499d0083cc34e60a077b7c4b3e1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d6288499d0083cc34e60a077b7c4b3e1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d6288499d0083cc34e60a077b7c4b3e1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d6288499d0083cc34e60a077b7c4b3e1-Reviews.html", "metareview": "", "pdf_size": 576689, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4211024417556102361&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Princeton University + Google AI Princeton; Princeton University; Princeton University + Google AI Princeton; Princeton University + Google AI Princeton; Princeton University + Google AI Princeton", "aff_domain": "cs.princeton.edu;princeton.edu;cs.princeton.edu;cs.princeton.edu;cs.princeton.edu", "email": "cs.princeton.edu;princeton.edu;cs.princeton.edu;cs.princeton.edu;cs.princeton.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d6288499d0083cc34e60a077b7c4b3e1-Abstract.html", "aff_unique_index": "0+1;0;0+1;0+1;0+1", "aff_unique_norm": "Princeton University;Google", "aff_unique_dep": ";Google AI", "aff_unique_url": "https://www.princeton.edu;https://ai.google", "aff_unique_abbr": "Princeton;Google AI", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Princeton", "aff_country_unique_index": "0+0;0;0+0;0+0;0+0", "aff_country_unique": "United States" }, { "title": "Spectral Signatures in Backdoor Attacks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11767", "id": "11767", "author_site": "Brandon Tran, Jerry Li, Aleksander Madry", "author": "Brandon Tran; Jerry Li; Aleksander Madry", "abstract": "A recent line of work has uncovered a new form of data poisoning: so-called backdoor attacks. These attacks are particularly dangerous because they do not affect a network's behavior on typical, benign data. Rather, the network only deviates from its expected output when triggered by an adversary's planted perturbation.", "bibtex": "@inproceedings{NEURIPS2018_280cf18b,\n author = {Tran, Brandon and Li, Jerry and Madry, Aleksander},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Spectral Signatures in Backdoor Attacks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/280cf18baf4311c92aa5a042336587d3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/280cf18baf4311c92aa5a042336587d3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/280cf18baf4311c92aa5a042336587d3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/280cf18baf4311c92aa5a042336587d3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/280cf18baf4311c92aa5a042336587d3-Reviews.html", "metareview": "", "pdf_size": 388132, "gs_citation": 1001, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16709034443112142703&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "EECS, MIT; Simons Institute; EECS, MIT", "aff_domain": "mit.edu;berkeley.edu;mit.edu", "email": "mit.edu;berkeley.edu;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/280cf18baf4311c92aa5a042336587d3-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Simons Institute for the Theory of Computing", "aff_unique_dep": "Electrical Engineering and Computer Science;", "aff_unique_url": "https://www.mit.edu;https://simons.berkeley.edu", "aff_unique_abbr": "MIT;SITC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "SplineNets: Continuous Neural Decision Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11212", "id": "11212", "author_site": "Cem Keskin, Shahram Izadi", "author": "Cem Keskin; Shahram Izadi", "abstract": "We present SplineNets, a practical and novel approach for using conditioning in convolutional neural networks (CNNs). SplineNets are continuous generalizations of neural decision graphs, and they can dramatically reduce runtime complexity and computation costs of CNNs, while maintaining or even increasing accuracy. Functions of SplineNets are both dynamic (i.e., conditioned on the input) and hierarchical (i.e.,conditioned on the computational path). SplineNets employ a unified loss function with a desired level of smoothness over both the network and decision parameters, while allowing for sparse activation of a subset of nodes for individual samples. In particular, we embed infinitely many function weights (e.g. filters) on smooth, low dimensional manifolds parameterized by compact B-splines, which are indexed by a position parameter. Instead of sampling from a categorical distribution to pick a branch, samples choose a continuous position to pick a function weight. We further show that by maximizing the mutual information between spline positions and class labels, the network can be optimally utilized and specialized for classification tasks. Experiments show that our approach can significantly increase the accuracy of ResNets with negligible cost in speed, matching the precision of a 110 level ResNet with a 32 level SplineNet.", "bibtex": "@inproceedings{NEURIPS2018_9246444d,\n author = {Keskin, Cem and Izadi, Shahram},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SplineNets: Continuous Neural Decision Graphs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9246444d94f081e3549803b928260f56-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9246444d94f081e3549803b928260f56-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9246444d94f081e3549803b928260f56-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9246444d94f081e3549803b928260f56-Reviews.html", "metareview": "", "pdf_size": 2987575, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1220798502070083348&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Google; Google", "aff_domain": "google.com;google.com", "email": "google.com;google.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9246444d94f081e3549803b928260f56-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Stacked Semantics-Guided Attention Model for Fine-Grained Zero-Shot Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11582", "id": "11582", "author_site": "yunlong yu, Zhong Ji, Yanwei Fu, Jichang Guo, Yanwei Pang, Zhongfei (Mark) Zhang", "author": "yunlong yu; Zhong Ji; Yanwei Fu; Jichang Guo; Yanwei Pang; Zhongfei (Mark) Zhang", "abstract": "Zero-Shot Learning (ZSL) is generally achieved via aligning the semantic relationships between the visual features and the corresponding class semantic descriptions. However, using the global features to represent fine-grained images may lead to sub-optimal results since they neglect the discriminative differences of local regions. Besides, different regions contain distinct discriminative information. The important regions should contribute more to the prediction. To this end, we propose a novel stacked semantics-guided attention (S2GA) model to obtain semantic relevant features by using individual class semantic features to progressively guide the visual features to generate an attention map for weighting the importance of different local regions. Feeding both the integrated visual features and the class semantic features into a multi-class classification architecture, the proposed framework can be trained end-to-end. Extensive experimental results on CUB and NABird datasets show that the proposed approach has a consistent improvement on both fine-grained zero-shot classification and retrieval tasks.", "bibtex": "@inproceedings{NEURIPS2018_9087b0ef,\n author = {yu, yunlong and Ji, Zhong and Fu, Yanwei and Guo, Jichang and Pang, Yanwei and Zhang, Zhongfei (Mark)},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stacked Semantics-Guided Attention Model for Fine-Grained Zero-Shot Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9087b0efc7c7acd1ef7e153678809c77-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9087b0efc7c7acd1ef7e153678809c77-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9087b0efc7c7acd1ef7e153678809c77-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9087b0efc7c7acd1ef7e153678809c77-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9087b0efc7c7acd1ef7e153678809c77-Reviews.html", "metareview": "", "pdf_size": 1980737, "gs_citation": 156, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17870706793172229300&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "School of Electrical and Information Engineering, Tianjin University; School of Electrical and Information Engineering, Tianjin University; School of Data Science, Fudan University + AITRICS; School of Electrical and Information Engineering, Tianjin University; School of Electrical and Information Engineering, Tianjin University; Computer Science Department, Binghamton University", "aff_domain": "tju.edu.cn;tju.edu.cn;fudan.edu.cn;tju.edu.cn;tju.edu.cn;cs.binghamton.edu", "email": "tju.edu.cn;tju.edu.cn;fudan.edu.cn;tju.edu.cn;tju.edu.cn;cs.binghamton.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9087b0efc7c7acd1ef7e153678809c77-Abstract.html", "aff_unique_index": "0;0;1+2;0;0;3", "aff_unique_norm": "Tianjin University;Fudan University;AITRICS;Binghamton University", "aff_unique_dep": "School of Electrical and Information Engineering;School of Data Science;;Computer Science Department", "aff_unique_url": "http://www.tju.edu.cn;https://www.fudan.edu.cn;https://www.aitrics.com;https://www.binghamton.edu", "aff_unique_abbr": "Tianjin University;Fudan;AITRICS;Binghamton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+1;0;0;2", "aff_country_unique": "China;South Korea;United States" }, { "title": "Statistical Optimality of Stochastic Gradient Descent on Hard Learning Problems through Multiple Passes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11777", "id": "11777", "author_site": "Loucas Pillaud-Vivien, Alessandro Rudi, Francis Bach", "author": "Loucas Pillaud-Vivien; Alessandro Rudi; Francis Bach", "abstract": "We consider stochastic gradient descent (SGD) for least-squares regression with potentially several passes over the data. While several passes have been widely reported to perform practically better in terms of predictive performance on unseen data, the existing theoretical analysis of SGD suggests that a single pass is statistically optimal. While this is true for low-dimensional easy problems, we show that for hard problems, multiple passes lead to statistically optimal predictions while single pass does not; we also show that in these hard models, the optimal number of passes over the data increases with sample size. In order to define the notion of hardness and show that our predictive performances are optimal, we consider potentially infinite-dimensional models and notions typically associated to kernel methods, namely, the decay of eigenvalues of the covariance matrix of the features and the complexity of the optimal predictor as measured through the covariance matrix.\nWe illustrate our results on synthetic experiments with non-linear kernel methods and on a classical benchmark with a linear model.", "bibtex": "@inproceedings{NEURIPS2018_10ff0b5e,\n author = {Pillaud-Vivien, Loucas and Rudi, Alessandro and Bach, Francis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Statistical Optimality of Stochastic Gradient Descent on Hard Learning Problems through Multiple Passes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/10ff0b5e85e5b85cc3095d431d8c08b4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/10ff0b5e85e5b85cc3095d431d8c08b4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/10ff0b5e85e5b85cc3095d431d8c08b4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/10ff0b5e85e5b85cc3095d431d8c08b4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/10ff0b5e85e5b85cc3095d431d8c08b4-Reviews.html", "metareview": "", "pdf_size": 1360456, "gs_citation": 128, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=425601323285057286&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "aff": "INRIA - Ecole Normale Sup\u00e9rieure; INRIA - Ecole Normale Sup\u00e9rieure; INRIA - Ecole Normale Sup\u00e9rieure", "aff_domain": "inria.fr;inria.fr;inria.fr", "email": "inria.fr;inria.fr;inria.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/10ff0b5e85e5b85cc3095d431d8c08b4-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "Ecole Normale Sup\u00e9rieure", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Statistical and Computational Trade-Offs in Kernel K-Means", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11891", "id": "11891", "author_site": "Daniele Calandriello, Lorenzo Rosasco", "author": "Daniele Calandriello; Lorenzo Rosasco", "abstract": "We investigate the efficiency of k-means in terms of both statistical and computational requirements.\nMore precisely, we study a Nystr\\\"om approach to kernel k-means. We analyze the statistical properties of the proposed method and show that it achieves the same accuracy of exact kernel k-means with only a fraction of computations.\nIndeed, we prove under basic assumptions that sampling $\\sqrt{n}$ Nystr\\\"om landmarks allows to greatly reduce computational costs without incurring in any loss of accuracy. To the best of our knowledge this is the first result showing in this kind for unsupervised learning.", "bibtex": "@inproceedings{NEURIPS2018_18903e44,\n author = {Calandriello, Daniele and Rosasco, Lorenzo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Statistical and Computational Trade-Offs in Kernel K-Means},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/18903e4430783a191b0cfab439daaef8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/18903e4430783a191b0cfab439daaef8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/18903e4430783a191b0cfab439daaef8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/18903e4430783a191b0cfab439daaef8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/18903e4430783a191b0cfab439daaef8-Reviews.html", "metareview": "", "pdf_size": 472997, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18297727446085201696&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "LCSL \u2013 IIT & MIT, Genoa, Italy; University of Genoa, LCSL \u2013 IIT & MIT", "aff_domain": "; ", "email": "; ", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/18903e4430783a191b0cfab439daaef8-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Istituto Italiano di Tecnologia (IIT);University of Genoa", "aff_unique_dep": "LCSL;LCSL \u2013 IIT", "aff_unique_url": "https://www.iit.it;https://www.unige.it", "aff_unique_abbr": "IIT;UniGe", "aff_campus_unique_index": "0", "aff_campus_unique": "Genoa;", "aff_country_unique_index": "0;0", "aff_country_unique": "Italy" }, { "title": "Statistical mechanics of low-rank tensor decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11785", "id": "11785", "author_site": "Jonathan Kadmon, Surya Ganguli", "author": "Jonathan Kadmon; Surya Ganguli", "abstract": "Often, large, high dimensional datasets collected across multiple\nmodalities can be organized as a higher order tensor. Low-rank tensor\ndecomposition then arises as a powerful and widely used tool to discover\nsimple low dimensional structures underlying such data. However, we\ncurrently lack a theoretical understanding of the algorithmic behavior\nof low-rank tensor decompositions. We derive Bayesian approximate\nmessage passing (AMP) algorithms for recovering arbitrarily shaped\nlow-rank tensors buried within noise, and we employ dynamic mean field\ntheory to precisely characterize their performance. Our theory reveals\nthe existence of phase transitions between easy, hard and impossible\ninference regimes, and displays an excellent match with simulations.\nMoreover, it reveals several qualitative surprises compared to the\nbehavior of symmetric, cubic tensor decomposition. Finally, we compare\nour AMP algorithm to the most commonly used algorithm, alternating\nleast squares (ALS), and demonstrate that AMP significantly outperforms\nALS in the presence of noise.", "bibtex": "@inproceedings{NEURIPS2018_b3848d61,\n author = {Kadmon, Jonathan and Ganguli, Surya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Statistical mechanics of low-rank tensor decomposition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b3848d61bbbc6207c6668a8a9e2730ed-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b3848d61bbbc6207c6668a8a9e2730ed-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b3848d61bbbc6207c6668a8a9e2730ed-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b3848d61bbbc6207c6668a8a9e2730ed-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b3848d61bbbc6207c6668a8a9e2730ed-Reviews.html", "metareview": "", "pdf_size": 677758, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9594213569092054865&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Department of Applied Physics, Stanford University; Department of Applied Physics, Stanford University + Google Brain, Mountain View, CA", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b3848d61bbbc6207c6668a8a9e2730ed-Abstract.html", "aff_unique_index": "0;0+1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": "Department of Applied Physics;Google Brain", "aff_unique_url": "https://www.stanford.edu;https://brain.google.com", "aff_unique_abbr": "Stanford;Google Brain", "aff_campus_unique_index": "0;0+1", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0+0", "aff_country_unique": "United States" }, { "title": "Stein Variational Gradient Descent as Moment Matching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11845", "id": "11845", "author_site": "Qiang Liu, Dilin Wang", "author": "Qiang Liu; Dilin Wang", "abstract": "Stein variational gradient descent (SVGD) is a non-parametric inference algorithm that evolves a set of particles to fit a given distribution of interest. We analyze the non-asymptotic properties of SVGD, showing that there exists a set of functions, which we call the Stein matching set, whose expectations are exactly estimated by any set of particles that satisfies the fixed point equation of SVGD. This set is the image of Stein operator applied on the feature maps of the positive definite kernel used in SVGD. Our results provide a theoretical framework for analyzing the properties of SVGD with different kernels, shedding insight into optimal kernel choice. In particular, we show that SVGD with linear kernels yields exact estimation of means and variances on Gaussian distributions, while random Fourier features enable probabilistic bounds for distributional approximation. Our results offer a refreshing view of the classical inference problem as fitting Stein\u2019s identity or solving the Stein equation, which may motivate more efficient algorithms.", "bibtex": "@inproceedings{NEURIPS2018_125b93c9,\n author = {Liu, Qiang and Wang, Dilin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stein Variational Gradient Descent as Moment Matching},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/125b93c9b50703fe9dac43ec231f5f83-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/125b93c9b50703fe9dac43ec231f5f83-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/125b93c9b50703fe9dac43ec231f5f83-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/125b93c9b50703fe9dac43ec231f5f83-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/125b93c9b50703fe9dac43ec231f5f83-Reviews.html", "metareview": "", "pdf_size": 322908, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10258782107332088213&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": ";", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/125b93c9b50703fe9dac43ec231f5f83-Abstract.html" }, { "title": "Step Size Matters in Deep Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11345", "id": "11345", "author_site": "Kamil Nar, Shankar Sastry", "author": "Kamil Nar; Shankar Sastry", "abstract": "Training a neural network with the gradient descent algorithm gives rise to a discrete-time nonlinear dynamical system. Consequently, behaviors that are typically observed in these systems emerge during training, such as convergence to an orbit but not to a fixed point or dependence of convergence on the initialization. Step size of the algorithm plays a critical role in these behaviors: it determines the subset of the local optima that the algorithm can converge to, and it specifies the magnitude of the oscillations if the algorithm converges to an orbit. To elucidate the effects of the step size on training of neural networks, we study the gradient descent algorithm as a discrete-time dynamical system, and by analyzing the Lyapunov stability of different solutions, we show the relationship between the step size of the algorithm and the solutions that can be obtained with this algorithm. The results provide an explanation for several phenomena observed in practice, including the deterioration in the training error with increased depth, the hardness of estimating linear mappings with large singular values, and the distinct performance of deep residual networks.", "bibtex": "@inproceedings{NEURIPS2018_e8fd4a8a,\n author = {Nar, Kamil and Sastry, Shankar},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Step Size Matters in Deep Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e8fd4a8a5bab2b3785d794ab51fef55c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e8fd4a8a5bab2b3785d794ab51fef55c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e8fd4a8a5bab2b3785d794ab51fef55c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e8fd4a8a5bab2b3785d794ab51fef55c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e8fd4a8a5bab2b3785d794ab51fef55c-Reviews.html", "metareview": "", "pdf_size": 522872, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5460214845816514152&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Electrical Engineering and Computer Sciences, University of California, Berkeley; Electrical Engineering and Computer Sciences, University of California, Berkeley", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e8fd4a8a5bab2b3785d794ab51fef55c-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Electrical Engineering and Computer Sciences", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Stimulus domain transfer in recurrent models for large scale cortical population prediction on video", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11693", "id": "11693", "author_site": "Fabian Sinz, Alexander Ecker, Paul Fahey, Edgar Walker, Erick M Cobos, Emmanouil Froudarakis, Dimitri Yatsenko, Xaq Pitkow, Jacob Reimer, Andreas Tolias", "author": "Fabian Sinz; Alexander S Ecker; Paul Fahey; Edgar Walker; Erick Cobos; Emmanouil Froudarakis; Dimitri Yatsenko; Zachary Pitkow; Jacob Reimer; Andreas Tolias", "abstract": "To better understand the representations in visual cortex, we need to generate better predictions of neural activity in awake animals presented with their ecological input: natural video. Despite recent advances in models for static images, models for predicting responses to natural video are scarce and standard linear-nonlinear models perform poorly. We developed a new deep recurrent network architecture that predicts inferred spiking activity of thousands of mouse V1 neurons simultaneously recorded with two-photon microscopy, while accounting for confounding factors such as the animal's gaze position and brain state changes related to running state and pupil dilation. Powerful system identification models provide an opportunity to gain insight into cortical functions through in silico experiments that can subsequently be tested in the brain. However, in many cases this approach requires that the model is able to generalize to stimulus statistics that it was not trained on, such as band-limited noise and other parameterized stimuli. We investigated these domain transfer properties in our model and find that our model trained on natural images is able to correctly predict the orientation tuning of neurons in responses to artificial noise stimuli. Finally, we show that we can fully generalize from movies to noise and maintain high predictive performance on both stimulus domains by fine-tuning only the final layer's weights on a network otherwise trained on natural movies. The converse, however, is not true.", "bibtex": "@inproceedings{NEURIPS2018_9d684c58,\n author = {Sinz, Fabian and Ecker, Alexander S and Fahey, Paul and Walker, Edgar and Cobos, Erick and Froudarakis, Emmanouil and Yatsenko, Dimitri and Pitkow, Zachary and Reimer, Jacob and Tolias, Andreas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stimulus domain transfer in recurrent models for large scale cortical population prediction on video},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9d684c589d67031a627ad33d59db65e5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9d684c589d67031a627ad33d59db65e5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9d684c589d67031a627ad33d59db65e5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9d684c589d67031a627ad33d59db65e5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9d684c589d67031a627ad33d59db65e5-Reviews.html", "metareview": "", "pdf_size": 5248574, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3426947555786993703&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": ";;;;;;;;;", "aff_domain": ";;;;;;;;;", "email": ";;;;;;;;;", "github": "", "project": "", "author_num": 10, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9d684c589d67031a627ad33d59db65e5-Abstract.html" }, { "title": "Stochastic Chebyshev Gradient Descent for Spectral Optimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11711", "id": "11711", "author_site": "Insu Han, Haim Avron, Jinwoo Shin", "author": "Insu Han; Haim Avron; Jinwoo Shin", "abstract": "A large class of machine learning techniques requires the solution of optimization problems involving spectral functions of parametric matrices, e.g. log-determinant and nuclear norm. Unfortunately, computing the gradient of a spectral function is generally of cubic complexity, as such gradient descent methods are rather expensive for optimizing objectives involving the spectral function. Thus, one naturally turns to stochastic gradient methods in hope that they will provide a way to reduce or altogether avoid the computation of full gradients. However, here a new challenge appears: there is no straightforward way to compute unbiased stochastic gradients for spectral functions. In this paper, we develop unbiased stochastic gradients for spectral-sums, an important subclass of spectral functions. Our unbiased stochastic gradients are based on combining randomized trace estimators with stochastic truncation of the Chebyshev expansions. A careful design of the truncation distribution allows us to offer distributions that are variance-optimal, which is crucial for fast and stable convergence of stochastic gradient methods. We further leverage our proposed stochastic gradients to devise stochastic methods for objective functions involving spectral-sums, and rigorously analyze their convergence rate. The utility of our methods is demonstrated in numerical experiments.", "bibtex": "@inproceedings{NEURIPS2018_898aef09,\n author = {Han, Insu and Avron, Haim and Shin, Jinwoo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Chebyshev Gradient Descent for Spectral Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/898aef0932f6aaecda27aba8e9903991-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/898aef0932f6aaecda27aba8e9903991-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/898aef0932f6aaecda27aba8e9903991-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/898aef0932f6aaecda27aba8e9903991-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/898aef0932f6aaecda27aba8e9903991-Reviews.html", "metareview": "", "pdf_size": 484022, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=131108035883764903&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "aff": "School of Electrical Engineering, Korea Advanced Institute of Science and Technology; Department of Applied Mathematics, Tel Aviv University; School of Electrical Engineering, Korea Advanced Institute of Science and Technology + AItrics", "aff_domain": "kaist.ac.kr;post.tau.ac.il;kaist.ac.kr", "email": "kaist.ac.kr;post.tau.ac.il;kaist.ac.kr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/898aef0932f6aaecda27aba8e9903991-Abstract.html", "aff_unique_index": "0;1;0+2", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Tel Aviv University;AITRICS", "aff_unique_dep": "School of Electrical Engineering;Department of Applied Mathematics;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.tau.ac.il;", "aff_unique_abbr": "KAIST;TAU;", "aff_campus_unique_index": "1;", "aff_campus_unique": ";Tel Aviv", "aff_country_unique_index": "0;1;0", "aff_country_unique": "South Korea;Israel;" }, { "title": "Stochastic Composite Mirror Descent: Optimal Bounds with High Probabilities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11167", "id": "11167", "author_site": "Yunwen Lei, Ke Tang", "author": "Yunwen Lei; Ke Tang", "abstract": "We study stochastic composite mirror descent, a class of scalable algorithms able to exploit the geometry and composite structure of a problem. We consider both convex and strongly convex objectives with non-smooth loss functions, for each of which we establish high-probability convergence rates optimal up to a logarithmic factor. We apply the derived computational error bounds to study the generalization performance of multi-pass stochastic gradient descent (SGD) in a non-parametric setting. Our high-probability generalization bounds enjoy a logarithmical dependency on the number of passes provided that the step size sequence is square-summable, which improves the existing bounds in expectation with a polynomial dependency and therefore gives a strong justification on the ability of multi-pass SGD to overcome overfitting. Our analysis removes boundedness assumptions on subgradients often imposed in the literature. Numerical results are reported to support our theoretical findings.", "bibtex": "@inproceedings{NEURIPS2018_8c6744c9,\n author = {Lei, Yunwen and Tang, Ke},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Composite Mirror Descent: Optimal Bounds with High Probabilities},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8c6744c9d42ec2cb9e8885b54ff744d0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8c6744c9d42ec2cb9e8885b54ff744d0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8c6744c9d42ec2cb9e8885b54ff744d0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8c6744c9d42ec2cb9e8885b54ff744d0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8c6744c9d42ec2cb9e8885b54ff744d0-Reviews.html", "metareview": "", "pdf_size": 443901, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4254337277773860510&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Shenzhen Key Laboratory of Computational Intelligence, Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen 518055, China; Shenzhen Key Laboratory of Computational Intelligence, Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen 518055, China", "aff_domain": "sustc.edu.cn;sustc.edu.cn", "email": "sustc.edu.cn;sustc.edu.cn", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8c6744c9d42ec2cb9e8885b54ff744d0-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Southern University of Science and Technology", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.sustech.edu.cn", "aff_unique_abbr": "SUSTech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Stochastic Cubic Regularization for Fast Nonconvex Optimization", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11296", "id": "11296", "author_site": "Nilesh Tripuraneni, Mitchell Stern, Chi Jin, Jeffrey Regier, Michael Jordan", "author": "Nilesh Tripuraneni; Mitchell Stern; Chi Jin; Jeffrey Regier; Michael I Jordan", "abstract": "This paper proposes a stochastic variant of a classic algorithm---the cubic-regularized Newton method [Nesterov and Polyak]. The proposed algorithm efficiently escapes saddle points and finds approximate local minima for general smooth, nonconvex functions in only $\\mathcal{\\tilde{O}}(\\epsilon^{-3.5})$ stochastic gradient and stochastic Hessian-vector product evaluations. The latter can be computed as efficiently as stochastic gradients. This improves upon the $\\mathcal{\\tilde{O}}(\\epsilon^{-4})$ rate of stochastic gradient descent. Our rate matches the best-known result for finding local minima without requiring any delicate acceleration or variance-reduction techniques.", "bibtex": "@inproceedings{NEURIPS2018_db191505,\n author = {Tripuraneni, Nilesh and Stern, Mitchell and Jin, Chi and Regier, Jeffrey and Jordan, Michael I},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Cubic Regularization for Fast Nonconvex Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/db1915052d15f7815c8b88e879465a1e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/db1915052d15f7815c8b88e879465a1e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/db1915052d15f7815c8b88e879465a1e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/db1915052d15f7815c8b88e879465a1e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/db1915052d15f7815c8b88e879465a1e-Reviews.html", "metareview": "", "pdf_size": 612962, "gs_citation": 205, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11681872854579121081&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;cs.berkeley.edu", "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;cs.berkeley.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/db1915052d15f7815c8b88e879465a1e-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stochastic Expectation Maximization with Variance Reduction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11764", "id": "11764", "author_site": "Jianfei Chen, Jun Zhu, Yee Whye Teh, Tong Zhang", "author": "Jianfei Chen; Jun Zhu; Yee Whye Teh; Tong Zhang", "abstract": "Expectation-Maximization (EM) is a popular tool for learning latent variable models, but the vanilla batch EM does not scale to large data sets because the whole data set is needed at every E-step. Stochastic Expectation Maximization (sEM) reduces the cost of E-step by stochastic approximation. However, sEM has a slower asymptotic convergence rate than batch EM, and requires a decreasing sequence of step sizes, which is difficult to tune. In this paper, we propose a variance reduced stochastic EM (sEM-vr) algorithm inspired by variance reduced stochastic gradient descent algorithms. We show that sEM-vr has the same exponential asymptotic convergence rate as batch EM. Moreover, sEM-vr only requires a constant step size to achieve this rate, which alleviates the burden of parameter tuning. We compare sEM-vr with batch EM, sEM and other algorithms on Gaussian mixture models and probabilistic latent semantic analysis, and sEM-vr converges significantly faster than these baselines.", "bibtex": "@inproceedings{NEURIPS2018_aba22f74,\n author = {Chen, Jianfei and Zhu, Jun and Teh, Yee Whye and Zhang, Tong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Expectation Maximization with Variance Reduction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/aba22f748b1a6dff75bda4fd1ee9fe07-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/aba22f748b1a6dff75bda4fd1ee9fe07-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/aba22f748b1a6dff75bda4fd1ee9fe07-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/aba22f748b1a6dff75bda4fd1ee9fe07-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/aba22f748b1a6dff75bda4fd1ee9fe07-Reviews.html", "metareview": "", "pdf_size": 566649, "gs_citation": 70, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12720350297797262569&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "Dept. of Comp. Sci. & Tech., BNRist Center, State Key Lab for Intell. Tech. & Sys., Institute for AI, THBI Lab, Tsinghua University, Beijing, 100084, China; Dept. of Comp. Sci. & Tech., BNRist Center, State Key Lab for Intell. Tech. & Sys., Institute for AI, THBI Lab, Tsinghua University, Beijing, 100084, China; Department of Statistics, University of Oxford; Tencent AI Lab", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;stats.ox.ac.uk;tongzhang-ml.org", "email": "mails.tsinghua.edu.cn;tsinghua.edu.cn;stats.ox.ac.uk;tongzhang-ml.org", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/aba22f748b1a6dff75bda4fd1ee9fe07-Abstract.html", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Tsinghua University;University of Oxford;Tencent", "aff_unique_dep": "Department of Computer Science and Technology;Department of Statistics;Tencent AI Lab", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ox.ac.uk;https://ai.tencent.com", "aff_unique_abbr": "THU;Oxford;Tencent AI Lab", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Beijing;Oxford;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "title": "Stochastic Nested Variance Reduction for Nonconvex Optimization", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11390", "id": "11390", "author_site": "Dongruo Zhou, Pan Xu, Quanquan Gu", "author": "Dongruo Zhou; Pan Xu; Quanquan Gu", "abstract": "We study finite-sum nonconvex optimization problems, where the objective function is an average of $n$ nonconvex functions. We propose a new stochastic gradient descent algorithm based on nested variance reduction. Compared with conventional stochastic variance reduced gradient (SVRG) algorithm that uses two reference points to construct a semi-stochastic gradient with diminishing variance in each iteration, our algorithm uses $K+1$ nested reference points to build a semi-stochastic gradient to further reduce its variance in each iteration. For smooth nonconvex functions, the proposed algorithm converges to an $\\epsilon$-approximate first-order stationary point (i.e., $\\|\\nabla F(\\mathbf{x})\\|_2\\leq \\epsilon$) within $\\tilde O(n\\land \\epsilon^{-2}+\\epsilon^{-3}\\land n^{1/2}\\epsilon^{-2})$\\footnote{$\\tilde O(\\cdot)$ hides the logarithmic factors, and $a\\land b$ means $\\min(a,b)$.} number of stochastic gradient evaluations. This improves the best known gradient complexity of SVRG $O(n+n^{2/3}\\epsilon^{-2})$ and that of SCSG $O(n\\land \\epsilon^{-2}+\\epsilon^{-10/3}\\land n^{2/3}\\epsilon^{-2})$. For gradient dominated functions, our algorithm also achieves better gradient complexity than the state-of-the-art algorithms. Thorough experimental results on different nonconvex optimization problems back up our theory.", "bibtex": "@inproceedings{NEURIPS2018_136f9513,\n author = {Zhou, Dongruo and Xu, Pan and Gu, Quanquan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Nested Variance Reduction for Nonconvex Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/136f951362dab62e64eb8e841183c2a9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/136f951362dab62e64eb8e841183c2a9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/136f951362dab62e64eb8e841183c2a9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/136f951362dab62e64eb8e841183c2a9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/136f951362dab62e64eb8e841183c2a9-Reviews.html", "metareview": "", "pdf_size": 3152397, "gs_citation": 236, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15876359094772698292&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 17, "aff": "Department of Computer Science, University of California, Los Angeles; Department of Computer Science, University of California, Los Angeles; Department of Computer Science, University of California, Los Angeles", "aff_domain": "cs.ucla.edu;cs.ucla.edu;cs.ucla.edu", "email": "cs.ucla.edu;cs.ucla.edu;cs.ucla.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/136f951362dab62e64eb8e841183c2a9-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Stochastic Nonparametric Event-Tensor Decomposition", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11661", "id": "11661", "author_site": "Shandian Zhe, Yishuai Du", "author": "Shandian Zhe; Yishuai Du", "abstract": "Tensor decompositions are fundamental tools for multiway data analysis. Existing approaches, however, ignore the valuable temporal information along with data, or simply discretize them into time steps so that important temporal patterns are easily missed. Moreover, most methods are limited to multilinear decomposition forms, and hence are unable to capture intricate, nonlinear relationships in data. To address these issues, we formulate event-tensors, to preserve the complete temporal information for multiway data, and propose a novel Bayesian nonparametric decomposition model. Our model can (1) fully exploit the time stamps to capture the critical, causal/triggering effects between the interaction events, (2) flexibly estimate the complex relationships between the entities in tensor modes, and (3) uncover hidden structures from their temporal interactions. For scalable inference, we develop a doubly stochastic variational Expectation-Maximization algorithm to conduct an online decomposition. Evaluations on both synthetic and real-world datasets show that our model not only improves upon the predictive performance of existing methods, but also discovers interesting clusters underlying the data.", "bibtex": "@inproceedings{NEURIPS2018_61f2585b,\n author = {Zhe, Shandian and Du, Yishuai},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Nonparametric Event-Tensor Decomposition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/61f2585b0ebcf1f532c4d1ec9a7d51aa-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/61f2585b0ebcf1f532c4d1ec9a7d51aa-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/61f2585b0ebcf1f532c4d1ec9a7d51aa-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/61f2585b0ebcf1f532c4d1ec9a7d51aa-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/61f2585b0ebcf1f532c4d1ec9a7d51aa-Reviews.html", "metareview": "", "pdf_size": 3002965, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9009768239391533592&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "School of Computing, University of Utah; School of Computing, University of Utah", "aff_domain": "cs.utah.edu;utah.edu", "email": "cs.utah.edu;utah.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/61f2585b0ebcf1f532c4d1ec9a7d51aa-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Utah", "aff_unique_dep": "School of Computing", "aff_unique_url": "https://www.utah.edu", "aff_unique_abbr": "U of U", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Utah", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Stochastic Primal-Dual Method for Empirical Risk Minimization with O(1) Per-Iteration Complexity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11800", "id": "11800", "author_site": "Conghui Tan, Tong Zhang, Shiqian Ma, Ji Liu", "author": "Conghui Tan; Tong Zhang; Shiqian Ma; Ji Liu", "abstract": "Regularized empirical risk minimization problem with linear predictor appears frequently in machine learning. In this paper, we propose a new stochastic primal-dual method to solve this class of problems. Different from existing methods, our proposed methods only require O(1) operations in each iteration. We also develop a variance-reduction variant of the algorithm that converges linearly. Numerical experiments suggest that our methods are faster than existing ones such as proximal SGD, SVRG and SAGA on high-dimensional problems.", "bibtex": "@inproceedings{NEURIPS2018_08048a9c,\n author = {Tan, Conghui and Zhang, Tong and Ma, Shiqian and Liu, Ji},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Primal-Dual Method for Empirical Risk Minimization with O(1) Per-Iteration Complexity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/08048a9c5630ccb67789a198f35d30ec-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/08048a9c5630ccb67789a198f35d30ec-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/08048a9c5630ccb67789a198f35d30ec-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/08048a9c5630ccb67789a198f35d30ec-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/08048a9c5630ccb67789a198f35d30ec-Reviews.html", "metareview": "", "pdf_size": 415028, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1128355606979398532&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "The Chinese University of Hong Kong; Tencent AI Lab; University of California, Davis; Tencent AI Lab + University of Rochester", "aff_domain": "se.cuhk.edu.hk;tongzhang-ml.org;math.ucdavis.edu;gmail.com", "email": "se.cuhk.edu.hk;tongzhang-ml.org;math.ucdavis.edu;gmail.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/08048a9c5630ccb67789a198f35d30ec-Abstract.html", "aff_unique_index": "0;1;2;1+3", "aff_unique_norm": "Chinese University of Hong Kong;Tencent;University of California, Davis;University of Rochester", "aff_unique_dep": ";Tencent AI Lab;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://ai.tencent.com;https://www.ucdavis.edu;https://www.rochester.edu", "aff_unique_abbr": "CUHK;Tencent AI Lab;UC Davis;U of R", "aff_campus_unique_index": "0;2;", "aff_campus_unique": "Hong Kong SAR;;Davis", "aff_country_unique_index": "0;0;1;0+1", "aff_country_unique": "China;United States" }, { "title": "Stochastic Spectral and Conjugate Descent Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11338", "id": "11338", "author_site": "Dmitry Kovalev, Peter Richtarik, Eduard Gorbunov, Elnur Gasanov", "author": "Dmitry Kovalev; Peter Richtarik; Eduard Gorbunov; Elnur Gasanov", "abstract": "The state-of-the-art methods for solving optimization problems in big dimensions are variants of randomized coordinate descent (RCD). In this paper we introduce a fundamentally new type of acceleration strategy for RCD based on the augmentation of the set of coordinate directions by a few spectral or conjugate directions. As we increase the number of extra directions to be sampled from, the rate of the method improves, and interpolates between the linear rate of RCD and a linear rate independent of the condition number. We develop and analyze also inexact variants of these methods where the spectral and conjugate directions are allowed to be approximate only. We motivate the above development by proving several negative results which highlight the limitations of RCD with importance sampling.", "bibtex": "@inproceedings{NEURIPS2018_e721a54a,\n author = {Kovalev, Dmitry and Richtarik, Peter and Gorbunov, Eduard and Gasanov, Elnur},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Spectral and Conjugate Descent Methods},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e721a54a8cf18c8543d44782d9ef681f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e721a54a8cf18c8543d44782d9ef681f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e721a54a8cf18c8543d44782d9ef681f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e721a54a8cf18c8543d44782d9ef681f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e721a54a8cf18c8543d44782d9ef681f-Reviews.html", "metareview": "", "pdf_size": 939828, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15120261814030278283&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e721a54a8cf18c8543d44782d9ef681f-Abstract.html" }, { "title": "Streaming Kernel PCA with $\\tilde{O}(\\sqrt{n})$ Random Features", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11704", "id": "11704", "author_site": "Enayat Ullah, Poorya Mianjy, Teodor Vanislavov Marinov, Raman Arora", "author": "Enayat Ullah; Poorya Mianjy; Teodor Vanislavov Marinov; Raman Arora", "abstract": "We study the statistical and computational aspects of kernel principal component analysis using random Fourier features and show that under mild assumptions, $O(\\sqrt{n} \\log n)$ features suffices to achieve $O(1/\\epsilon^2)$ sample complexity. Furthermore, we give a memory efficient streaming algorithm based on classical Oja's algorithm that achieves this rate", "bibtex": "@inproceedings{NEURIPS2018_7ae11af2,\n author = {Ullah, Enayat and Mianjy, Poorya and Marinov, Teodor Vanislavov and Arora, Raman},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Streaming Kernel PCA with \\textbackslash tilde\\lbrace O\\rbrace (\\textbackslash sqrt\\lbrace n\\rbrace ) Random Features},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7ae11af20803185120e83d3ce4fb4ed7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7ae11af20803185120e83d3ce4fb4ed7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7ae11af20803185120e83d3ce4fb4ed7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7ae11af20803185120e83d3ce4fb4ed7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7ae11af20803185120e83d3ce4fb4ed7-Reviews.html", "metareview": "", "pdf_size": 423427, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff": "Department of Computer Science, Johns Hopkins University; Department of Computer Science, Johns Hopkins University; Department of Computer Science, Johns Hopkins University; Department of Computer Science, Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu;jhu.edu;cs.jhu.edu", "email": "jhu.edu;jhu.edu;jhu.edu;cs.jhu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7ae11af20803185120e83d3ce4fb4ed7-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Streamlining Variational Inference for Constraint Satisfaction Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11998", "id": "11998", "author_site": "Aditya Grover, Tudor Achim, Stefano Ermon", "author": "Aditya Grover; Tudor Achim; Stefano Ermon", "abstract": "Several algorithms for solving constraint satisfaction problems are based on survey propagation, a variational inference scheme used to obtain approximate marginal probability estimates for variable assignments. These marginals correspond to how frequently each variable is set to true among satisfying assignments, and are used to inform branching decisions during search; however, marginal estimates obtained via survey propagation are approximate and can be self-contradictory. We introduce a more general branching strategy based on streamlining constraints, which sidestep hard assignments to variables. We show that streamlined solvers consistently outperform decimation-based solvers on random k-SAT instances for several problem sizes, shrinking the gap between empirical performance and theoretical limits of satisfiability by 16.3% on average for k = 3, 4, 5, 6.", "bibtex": "@inproceedings{NEURIPS2018_02ed8122,\n author = {Grover, Aditya and Achim, Tudor and Ermon, Stefano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Streamlining Variational Inference for Constraint Satisfaction Problems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/02ed812220b0705fabb868ddbf17ea20-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/02ed812220b0705fabb868ddbf17ea20-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/02ed812220b0705fabb868ddbf17ea20-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/02ed812220b0705fabb868ddbf17ea20-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/02ed812220b0705fabb868ddbf17ea20-Reviews.html", "metareview": "", "pdf_size": 2344367, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9129978297441572165&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Computer Science Department, Stanford University; Computer Science Department, Stanford University; Computer Science Department, Stanford University", "aff_domain": "cs.stanford.edu;cs.stanford.edu;cs.stanford.edu", "email": "cs.stanford.edu;cs.stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/02ed812220b0705fabb868ddbf17ea20-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Structural Causal Bandits: Where to Intervene?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11265", "id": "11265", "author_site": "Sanghack Lee, Elias Bareinboim", "author": "Sanghack Lee; Elias Bareinboim", "abstract": "We study the problem of identifying the best action in a sequential decision-making setting when the reward distributions of the arms exhibit a non-trivial dependence structure, which is governed by the underlying causal model of the domain where the agent is deployed. In this setting, playing an arm corresponds to intervening on a set of variables and setting them to specific values. In this paper, we show that whenever the underlying causal model is not taken into account during the decision-making process, the standard strategies of simultaneously intervening on all variables or on all the subsets of the variables may, in general, lead to suboptimal policies, regardless of the number of interventions performed by the agent in the environment. We formally acknowledge this phenomenon and investigate structural properties implied by the underlying causal model, which lead to a complete characterization of the relationships between the arms' distributions. We leverage this characterization to build a new algorithm that takes as input a causal structure and finds a minimal, sound, and complete set of qualified arms that an agent should play to maximize its expected reward. We empirically demonstrate that the new strategy learns an optimal policy and leads to orders of magnitude faster convergence rates when compared with its causal-insensitive counterparts.", "bibtex": "@inproceedings{NEURIPS2018_c0a271bc,\n author = {Lee, Sanghack and Bareinboim, Elias},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Structural Causal Bandits: Where to Intervene?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c0a271bc0ecb776a094786474322cb82-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c0a271bc0ecb776a094786474322cb82-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c0a271bc0ecb776a094786474322cb82-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c0a271bc0ecb776a094786474322cb82-Reviews.html", "metareview": "", "pdf_size": 1186178, "gs_citation": 129, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4413359648093381122&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, Purdue University; Department of Computer Science, Purdue University", "aff_domain": "purdue.edu;purdue.edu", "email": "purdue.edu;purdue.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c0a271bc0ecb776a094786474322cb82-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Structure-Aware Convolutional Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11029", "id": "11029", "author_site": "Jianlong Chang, Jie Gu, Lingfeng Wang, GAOFENG MENG, SHIMING XIANG, Chunhong Pan", "author": "Jianlong Chang; Jie Gu; Lingfeng Wang; GAOFENG MENG; SHIMING XIANG; Chunhong Pan", "abstract": "Convolutional neural networks (CNNs) are inherently subject to invariable filters that can only aggregate local inputs with the same topological structures. It causes that CNNs are allowed to manage data with Euclidean or grid-like structures (e.g., images), not ones with non-Euclidean or graph structures (e.g., traffic networks). To broaden the reach of CNNs, we develop structure-aware convolution to eliminate the invariance, yielding a unified mechanism of dealing with both Euclidean and non-Euclidean structured data. Technically, filters in the structure-aware convolution are generalized to univariate functions, which are capable of aggregating local inputs with diverse topological structures. Since infinite parameters are required to determine a univariate function, we parameterize these filters with numbered learnable parameters in the context of the function approximation theory. By replacing the classical convolution in CNNs with the structure-aware convolution, Structure-Aware Convolutional Neural Networks (SACNNs) are readily established. Extensive experiments on eleven datasets strongly evidence that SACNNs outperform current models on various machine learning tasks, including image classification and clustering, text categorization, skeleton-based action recognition, molecular activity detection, and taxi flow prediction.", "bibtex": "@inproceedings{NEURIPS2018_182be0c5,\n author = {Chang, Jianlong and Gu, Jie and Wang, Lingfeng and MENG, GAOFENG and XIANG, SHIMING and Pan, Chunhong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Structure-Aware Convolutional Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/182be0c5cdcd5072bb1864cdee4d3d6e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/182be0c5cdcd5072bb1864cdee4d3d6e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/182be0c5cdcd5072bb1864cdee4d3d6e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/182be0c5cdcd5072bb1864cdee4d3d6e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/182be0c5cdcd5072bb1864cdee4d3d6e-Reviews.html", "metareview": "", "pdf_size": 1428457, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15143914212740363018&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "NLPR, Institute of Automation, Chinese Academy of Sciences+School of Artificial Intelligence, University of Chinese Academy of Sciences; NLPR, Institute of Automation, Chinese Academy of Sciences+School of Artificial Intelligence, University of Chinese Academy of Sciences; NLPR, Institute of Automation, Chinese Academy of Sciences; NLPR, Institute of Automation, Chinese Academy of Sciences; NLPR, Institute of Automation, Chinese Academy of Sciences+School of Artificial Intelligence, University of Chinese Academy of Sciences; NLPR, Institute of Automation, Chinese Academy of Sciences", "aff_domain": "nlpr.ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn", "email": "nlpr.ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn;nlpr.ia.ac.cn", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/182be0c5cdcd5072bb1864cdee4d3d6e-Abstract.html", "aff_unique_index": "0+1;0+1;0;0;0+1;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation;School of Artificial Intelligence", "aff_unique_url": "http://www.ia.cas.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0+0;0;0;0+0;0", "aff_country_unique": "China" }, { "title": "Structured Local Minima in Sparse Blind Deconvolution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11242", "id": "11242", "author_site": "Yuqian Zhang, Han-wen Kuo, John Wright", "author": "Yuqian Zhang; Han-wen Kuo; John Wright", "abstract": "Blind deconvolution is a ubiquitous problem of recovering two unknown signals from their convolution. Unfortunately, this is an ill-posed problem in general. This paper focuses on the {\\em short and sparse} blind deconvolution problem, where the one unknown signal is short and the other one is sparsely and randomly supported. This variant captures the structure of the unknown signals in several important applications. We assume the short signal to have unit $\\ell^2$ norm and cast the blind deconvolution problem as a nonconvex optimization problem over the sphere. We demonstrate that (i) in a certain region of the sphere, every local optimum is close to some shift truncation of the ground truth, and (ii) for a generic short signal of length $k$, when the sparsity of activation signal $\\theta\\lesssim k^{-2/3}$ and number of measurements $m\\gtrsim\\poly\\paren{k}$, a simple initialization method together with a descent algorithm which escapes strict saddle points recovers a near shift truncation of the ground truth kernel.", "bibtex": "@inproceedings{NEURIPS2018_1e1d1841,\n author = {Zhang, Yuqian and Kuo, Han-wen and Wright, John},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Structured Local Minima in Sparse Blind Deconvolution},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1e1d184167ca7676cf665225e236a3d2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1e1d184167ca7676cf665225e236a3d2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1e1d184167ca7676cf665225e236a3d2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1e1d184167ca7676cf665225e236a3d2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1e1d184167ca7676cf665225e236a3d2-Reviews.html", "metareview": "", "pdf_size": 820191, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4864741555441297464&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Electrical Engineer and Data Science Institute, Columbia University, New York, NY 10027; Department of Electrical Engineer and Data Science Institute, Columbia University, New York, NY 10027; Department of Electrical Engineer and Data Science Institute, Columbia University, New York, NY 10027", "aff_domain": "columbia.edu;columbia.edu;columbia.edu", "email": "columbia.edu;columbia.edu;columbia.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1e1d184167ca7676cf665225e236a3d2-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "Department of Electrical Engineer and Data Science Institute", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "New York", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sublinear Time Low-Rank Approximation of Distance Matrices", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11377", "id": "11377", "author_site": "Ainesh Bakshi, David Woodruff", "author": "Ainesh Bakshi; David Woodruff", "abstract": "Let $\\PP=\\{ p_1, p_2, \\ldots p_n \\}$ and $\\QQ = \\{ q_1, q_2 \\ldots q_m \\}$ be two point sets in an arbitrary metric space. Let $\\AA$ represent the $m\\times n$ pairwise distance matrix with $\\AA_{i,j} = d(p_i, q_j)$. Such distance matrices are commonly computed in software packages and have applications to learning image manifolds, handwriting recognition, and multi-dimensional unfolding, among other things. In an attempt to reduce their description size, we study low rank approximation of such matrices. Our main result is to show that for any underlying distance metric $d$, it is possible to achieve an additive error low rank approximation in sublinear time. We note that it is provably impossible to achieve such a guarantee in sublinear time for arbitrary matrices $\\AA$, and our proof exploits special properties of distance matrices. We develop a recursive algorithm based on additive projection-cost preserving sampling. We then show that in general, relative error approximation in sublinear time is impossible for distance matrices, even if one allows for bicriteria solutions. Additionally, we show that if $\\PP = \\QQ$ and $d$ is the squared Euclidean distance, which is not a metric but rather the square of a metric, then a relative error bicriteria solution can be found in sublinear time. Finally, we empirically compare our algorithm with the SVD and input sparsity time algorithms. Our algorithm is several hundred times faster than the SVD, and about $8$-$20$ times faster than input sparsity methods on real-world and and synthetic datasets of size $10^8$. Accuracy-wise, our algorithm is only slightly worse than that of the SVD (optimal) and input-sparsity time algorithms.", "bibtex": "@inproceedings{NEURIPS2018_c4500821,\n author = {Bakshi, Ainesh and Woodruff, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sublinear Time Low-Rank Approximation of Distance Matrices},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c45008212f7bdf6eab6050c2a564435a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c45008212f7bdf6eab6050c2a564435a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c45008212f7bdf6eab6050c2a564435a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c45008212f7bdf6eab6050c2a564435a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c45008212f7bdf6eab6050c2a564435a-Reviews.html", "metareview": "", "pdf_size": 486741, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18286664270887945255&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Computer Science, Carnegie Mellon University; Department of Computer Science, Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c45008212f7bdf6eab6050c2a564435a-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Submodular Field Grammars: Representation, Inference, and Application to Image Parsing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11426", "id": "11426", "author_site": "Abram Friesen, Pedro Domingos", "author": "Abram L. Friesen; Pedro M Domingos", "abstract": "Natural scenes contain many layers of part-subpart structure, and distributions over them are thus naturally represented by stochastic image grammars, with one production per decomposition of a part. Unfortunately, in contrast to language grammars, where the number of possible split points for a production $A \\rightarrow BC$ is linear in the length of $A$, in an image there are an exponential number of ways to split a region into subregions. This makes parsing intractable and requires image grammars to be severely restricted in practice, for example by allowing only rectangular regions. In this paper, we address this problem by associating with each production a submodular Markov random field whose labels are the subparts and whose labeling segments the current object into these subparts. We call the result a submodular field grammar (SFG). Finding the MAP split of a region into subregions is now tractable, and by exploiting this we develop an efficient approximate algorithm for MAP parsing of images with SFGs. Empirically, we present promising improvements in accuracy when using SFGs for scene understanding, and show exponential improvements in inference time compared to traditional methods, while returning comparable minima.", "bibtex": "@inproceedings{NEURIPS2018_c5866e93,\n author = {Friesen, Abram L and Domingos, Pedro M},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Submodular Field Grammars: Representation, Inference, and Application to Image Parsing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c5866e93cab1776890fe343c9e7063fb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c5866e93cab1776890fe343c9e7063fb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c5866e93cab1776890fe343c9e7063fb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c5866e93cab1776890fe343c9e7063fb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c5866e93cab1776890fe343c9e7063fb-Reviews.html", "metareview": "", "pdf_size": 1216880, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8876899760491089326&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Paul G. Allen School of Computer Science and Engineering, University of Washington; Paul G. Allen School of Computer Science and Engineering, University of Washington", "aff_domain": "cs.washington.edu;cs.washington.edu", "email": "cs.washington.edu;cs.washington.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c5866e93cab1776890fe343c9e7063fb-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Paul G. Allen School of Computer Science and Engineering", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Submodular Maximization via Gradient Ascent: The Case of Deep Submodular Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11765", "id": "11765", "author_site": "Wenruo Bai, William Stafford Noble, Jeffrey A Bilmes", "author": "Wenruo Bai; William Stafford Noble; Jeff A. Bilmes", "abstract": "We study the problem of maximizing deep submodular functions (DSFs) subject to a matroid constraint. DSFs are an expressive class of submodular functions that include, as strict subfamilies, the facility location, weighted coverage, and sums of concave composed with modular functions. We use a strategy similar to the continuous greedy approach, but we show that the multilinear extension of any DSF has a natural and computationally attainable concave relaxation that we can optimize using gradient ascent. Our results show a guarantee of $\\max_{0<\\delta<1}(1-\\epsilon-\\delta-e^{-\\delta^2\\Omega(k)})$ with a running time of $O(\\nicefrac{n^2}{\\epsilon^2})$ plus time for pipage rounding\nto recover a discrete solution, where $k$ is the rank of the matroid constraint. This bound is often better than the standard $1-1/e$ guarantee of the continuous greedy algorithm, but runs much faster. Our bound also holds even for fully curved ($c=1$) functions where the guarantee of $1-c/e$ degenerates to $1-1/e$ where $c$ is the curvature of $f$. We perform computational experiments that support our theoretical results.", "bibtex": "@inproceedings{NEURIPS2018_b43a6403,\n author = {Bai, Wenruo and Stafford Noble, William and Bilmes, Jeff A},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Submodular Maximization via Gradient Ascent: The Case of Deep Submodular Functions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b43a6403c17870707ca3c44984a2da22-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b43a6403c17870707ca3c44984a2da22-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b43a6403c17870707ca3c44984a2da22-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b43a6403c17870707ca3c44984a2da22-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b43a6403c17870707ca3c44984a2da22-Reviews.html", "metareview": "", "pdf_size": 744225, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10454451826432139623&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Depts. of Electrical & Computer Engineering\u2021; Depts. of Electrical & Computer Engineering\u2021, Computer Science and Engineering$; Depts. of Electrical & Computer Engineering\u2021, Computer Science and Engineering$, and Genome Sciences\u2217", "aff_domain": "uw.edu;uw.edu;uw.edu", "email": "uw.edu;uw.edu;uw.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b43a6403c17870707ca3c44984a2da22-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Los Angeles;University of California, San Diego;Electrical & Computer Engineering", "aff_unique_dep": "Department of Electrical and Computer Engineering;Department of Electrical & Computer Engineering;Computer Science and Engineering", "aff_unique_url": "https://www.ucla.edu;https://www.ucsd.edu;", "aff_unique_abbr": "UCLA;UCSD;", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "title": "Supervised autoencoders: Improving generalization performance with unsupervised regularizers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11038", "id": "11038", "author_site": "Lei Le, Andrew Patterson, Martha White", "author": "Lei Le; Andrew Patterson; Martha White", "abstract": "Generalization performance is a central goal in machine learning, particularly when learning representations with large neural networks. A common strategy to improve generalization has been through the use of regularizers, typically as a norm constraining the parameters. Regularizing hidden layers in a neural network architecture, however, is not straightforward. There have been a few effective layer-wise suggestions, but without theoretical guarantees for improved performance. In this work, we theoretically and empirically analyze one such model, called a supervised auto-encoder: a neural network that predicts both inputs (reconstruction error) and targets jointly. We provide a novel generalization result for linear auto-encoders, proving uniform stability based on the inclusion of the reconstruction error---particularly as an improvement on simplistic regularization such as norms or even on more advanced regularizations such as the use of auxiliary tasks. Empirically, we then demonstrate that, across an array of architectures with a different number of hidden units and activation functions, the supervised auto-encoder compared to the corresponding standard neural network never harms performance and can significantly improve generalization.", "bibtex": "@inproceedings{NEURIPS2018_2a38a4a9,\n author = {Le, Lei and Patterson, Andrew and White, Martha},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Supervised autoencoders: Improving generalization performance with unsupervised regularizers},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2a38a4a9316c49e5a833517c45d31070-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2a38a4a9316c49e5a833517c45d31070-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2a38a4a9316c49e5a833517c45d31070-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2a38a4a9316c49e5a833517c45d31070-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2a38a4a9316c49e5a833517c45d31070-Reviews.html", "metareview": "", "pdf_size": 801963, "gs_citation": 332, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16640119433918017868&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Department of Computer Science, Indiana University; Department of Computing Science, University of Alberta; Department of Computing Science, University of Alberta", "aff_domain": "iu.edu;ualberta.ca;ualberta.ca", "email": "iu.edu;ualberta.ca;ualberta.ca", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2a38a4a9316c49e5a833517c45d31070-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Indiana University;University of Alberta", "aff_unique_dep": "Department of Computer Science;Department of Computing Science", "aff_unique_url": "https://www.indiana.edu;https://www.ualberta.ca", "aff_unique_abbr": "IU;UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Canada" }, { "title": "Supervising Unsupervised Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11489", "id": "11489", "author_site": "Vikas Garg, Adam Kalai", "author": "Vikas Garg; Adam T Kalai", "abstract": "We introduce a framework to transfer knowledge acquired from a repository of (heterogeneous) supervised datasets to new unsupervised datasets. Our perspective avoids the subjectivity inherent in unsupervised learning by reducing it to supervised learning, and provides a principled way to evaluate unsupervised algorithms. We demonstrate the versatility of our framework via rigorous agnostic bounds on a variety of unsupervised problems. In the context of clustering, our approach helps choose the number of clusters and the clustering algorithm, remove the outliers, and provably circumvent Kleinberg's impossibility result. Experiments across hundreds of problems demonstrate improvements in performance on unsupervised data with simple algorithms despite the fact our problems come from heterogeneous domains. Additionally, our framework lets us leverage deep networks to learn common features across many small datasets, and perform zero shot learning.", "bibtex": "@inproceedings{NEURIPS2018_72e6d323,\n author = {Garg, Vikas and Kalai, Adam T},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Supervising Unsupervised Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/72e6d3238361fe70f22fb0ac624a7072-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/72e6d3238361fe70f22fb0ac624a7072-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/72e6d3238361fe70f22fb0ac624a7072-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/72e6d3238361fe70f22fb0ac624a7072-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/72e6d3238361fe70f22fb0ac624a7072-Reviews.html", "metareview": "", "pdf_size": 454097, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12424323365238581208&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "CSAIL, MIT; Microsoft Research", "aff_domain": "csail.mit.edu;microsoft.com", "email": "csail.mit.edu;microsoft.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/72e6d3238361fe70f22fb0ac624a7072-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory;Microsoft Research", "aff_unique_url": "https://www.csail.mit.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MIT;MSR", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Support Recovery for Orthogonal Matching Pursuit: Upper and Lower bounds", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12022", "id": "12022", "author_site": "Raghav Somani, Chirag Gupta, Prateek Jain, Praneeth Netrapalli", "author": "Raghav Somani; Chirag Gupta; Prateek Jain; Praneeth Netrapalli", "abstract": "This paper studies the problem of sparse regression where the goal is to learn a sparse vector that best optimizes a given objective function. Under the assumption that the objective function satisfies restricted strong convexity (RSC), we analyze orthogonal matching pursuit (OMP), a greedy algorithm that is used heavily in applications, and obtain support recovery result as well as a tight generalization error bound for OMP. Furthermore, we obtain lower bounds for OMP, showing that both our results on support recovery and generalization error are tight up to logarithmic factors. To the best of our knowledge, these support recovery and generalization bounds are the first such matching upper and lower bounds (up to logarithmic factors) for {\\em any} sparse regression algorithm under the RSC assumption.", "bibtex": "@inproceedings{NEURIPS2018_84b64e53,\n author = {Somani, Raghav and Gupta, Chirag and Jain, Prateek and Netrapalli, Praneeth},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Support Recovery for Orthogonal Matching Pursuit: Upper and Lower bounds},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/84b64e537f08e81b8dea8cce972a28b2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/84b64e537f08e81b8dea8cce972a28b2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/84b64e537f08e81b8dea8cce972a28b2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/84b64e537f08e81b8dea8cce972a28b2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/84b64e537f08e81b8dea8cce972a28b2-Reviews.html", "metareview": "", "pdf_size": 509322, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6088235144388534838&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Microsoft Research, India; Machine Learning Department, Carnegie Mellon University + Microsoft Research, India; Microsoft Research, India; Microsoft Research, India", "aff_domain": "microsoft.com;andrew.cmu.edu;microsoft.com;microsoft.com", "email": "microsoft.com;andrew.cmu.edu;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/84b64e537f08e81b8dea8cce972a28b2-Abstract.html", "aff_unique_index": "0;1+0;0;0", "aff_unique_norm": "Microsoft;Carnegie Mellon University", "aff_unique_dep": "Microsoft Research;Machine Learning Department", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/india.aspx;https://www.cmu.edu", "aff_unique_abbr": "MSR India;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1+0;0;0", "aff_country_unique": "India;United States" }, { "title": "Symbolic Graph Reasoning Meets Convolutions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11198", "id": "11198", "author_site": "Xiaodan Liang, Zhiting Hu, Hao Zhang, Liang Lin, Eric Xing", "author": "Xiaodan Liang; Zhiting Hu; Hao Zhang; Liang Lin; Eric P Xing", "abstract": "Beyond local convolution networks, we explore how to harness various external human knowledge for endowing the networks with the capability of semantic global reasoning. Rather than using separate graphical models (e.g. CRF) or constraints for modeling broader dependencies, we propose a new Symbolic Graph Reasoning (SGR) layer, which performs reasoning over a group of symbolic nodes whose outputs explicitly represent different properties of each semantic in a prior knowledge graph. To cooperate with local convolutions, each SGR is constituted by three modules: a) a primal local-to-semantic voting module where the features of all symbolic nodes are generated by voting from local representations; b) a graph reasoning module propagates information over knowledge graph to achieve global semantic coherency; c) a dual semantic-to-local mapping module learns new associations of the evolved symbolic nodes with local representations, and accordingly enhances local features. The SGR layer can be injected between any convolution layers and instantiated with distinct prior graphs. Extensive experiments show incorporating SGR significantly improves plain ConvNets on three semantic segmentation tasks and one image classification task. More analyses show the SGR layer learns shared symbolic representations for domains/datasets with the different label set given a universal knowledge graph, demonstrating its superior generalization capability.", "bibtex": "@inproceedings{NEURIPS2018_cbb6a3b8,\n author = {Liang, Xiaodan and Hu, Zhiting and Zhang, Hao and Lin, Liang and Xing, Eric P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Symbolic Graph Reasoning Meets Convolutions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cbb6a3b884f4f88b3a8e3d44c636cbd8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cbb6a3b884f4f88b3a8e3d44c636cbd8-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cbb6a3b884f4f88b3a8e3d44c636cbd8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cbb6a3b884f4f88b3a8e3d44c636cbd8-Reviews.html", "metareview": "", "pdf_size": 3391522, "gs_citation": 200, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13018684175844284790&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff": "School of Intelligent Systems Engineering, Sun Yat-sen University; Carnegie Mellon University; Carnegie Mellon University; School of Data and Computer Science, Sun Yat-sen University; Petuum Inc.", "aff_domain": "gmail.com;cs.cmu.edu;cs.cmu.edu;ieee.org;cs.cmu.edu", "email": "gmail.com;cs.cmu.edu;cs.cmu.edu;ieee.org;cs.cmu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cbb6a3b884f4f88b3a8e3d44c636cbd8-Abstract.html", "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "Sun Yat-sen University;Carnegie Mellon University;Petuum Inc.", "aff_unique_dep": "School of Intelligent Systems Engineering;;", "aff_unique_url": "http://www.sysu.edu.cn/;https://www.cmu.edu;https://www.petuum.com", "aff_unique_abbr": "SYSU;CMU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "China;United States" }, { "title": "Synaptic Strength For Convolutional Neural Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11962", "id": "11962", "author_site": "CHEN LIN, Zhao Zhong, Wu Wei, Junjie Yan", "author": "CHEN LIN; Zhao Zhong; Wu Wei; Junjie Yan", "abstract": "Convolutional Neural Networks(CNNs) are both computation and memory inten-sive which hindered their deployment in mobile devices. Inspired by the relevantconcept in neural science literature, we propose Synaptic Pruning: a data-drivenmethod to prune connections between input and output feature maps with a newlyproposed class of parameters called Synaptic Strength. Synaptic Strength is de-signed to capture the importance of a connection based on the amount of informa-tion it transports. Experiment results show the effectiveness of our approach. OnCIFAR-10, we prune connections for various CNN models with up to96%, whichresults in significant size reduction and computation saving. Further evaluation onImageNet demonstrates that synaptic pruning is able to discover efficient modelswhich is competitive to state-of-the-art compact CNNs such as MobileNet-V2andNasNet-Mobile. Our contribution is summarized as following: (1) We introduceSynaptic Strength, a new class of parameters for CNNs to indicate the importanceof each connections. (2) Our approach can prune various CNNs with high com-pression without compromising accuracy. (3) Further investigation shows, theproposed Synaptic Strength is a better indicator for kernel pruning compared withthe previous approach in both empirical result and theoretical analysis.", "bibtex": "@inproceedings{NEURIPS2018_4d19b37a,\n author = {LIN, CHEN and Zhong, Zhao and Wei, Wu and Yan, Junjie},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Synaptic Strength For Convolutional Neural Network},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4d19b37a2c399deace9082d464930022-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4d19b37a2c399deace9082d464930022-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4d19b37a2c399deace9082d464930022-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4d19b37a2c399deace9082d464930022-Reviews.html", "metareview": "", "pdf_size": 437396, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7764055171550260216&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "SenseTime Research; NLPR, CASIA+University of Chinese Academy of Sciences; SenseTime Research; SenseTime Research", "aff_domain": "sensetime.com;nlpr.ia.ac.cn;sensetime.com;sensetime.com", "email": "sensetime.com;nlpr.ia.ac.cn;sensetime.com;sensetime.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4d19b37a2c399deace9082d464930022-Abstract.html", "aff_unique_index": "0;1+2;0;0", "aff_unique_norm": "SenseTime;Chinese Academy of Sciences Institute of Automation;University of Chinese Academy of Sciences", "aff_unique_dep": "SenseTime Research;National Laboratory of Pattern Recognition;", "aff_unique_url": "https://www.sensetime.com;http://www.casia.ac.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "SenseTime;CASIA;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0;0;0", "aff_country_unique": "China" }, { "title": "Synthesized Policies for Transfer and Adaptation across Tasks and Environments", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11135", "id": "11135", "author_site": "Hexiang Hu, Liyu Chen, Boqing Gong, Fei Sha", "author": "Hexiang Hu; Liyu Chen; Boqing Gong; Fei Sha", "abstract": "The ability to transfer in reinforcement learning is key towards building an agent of general artificial intelligence. In this paper, we consider the problem of learning to simultaneously transfer across both environments and tasks, probably more importantly, by learning from only sparse (environment, task) pairs out of all the possible combinations. We propose a novel compositional neural network architecture which depicts a meta rule for composing policies from environment and task embeddings. Notably, one of the main challenges is to learn the embeddings jointly with the meta rule. We further propose new training methods to disentangle the embeddings, making them both distinctive signatures of the environments and tasks and effective building blocks for composing the policies. Experiments on GridWorld and THOR, of which the agent takes as input an egocentric view, show that our approach gives rise to high success rates on all the (environment, task) pairs after learning from only 40% of them.", "bibtex": "@inproceedings{NEURIPS2018_00ac8ed3,\n author = {Hu, Hexiang and Chen, Liyu and Gong, Boqing and Sha, Fei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Synthesized Policies for Transfer and Adaptation across Tasks and Environments},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/00ac8ed3b4327bdd4ebbebcb2ba10a00-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/00ac8ed3b4327bdd4ebbebcb2ba10a00-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/00ac8ed3b4327bdd4ebbebcb2ba10a00-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/00ac8ed3b4327bdd4ebbebcb2ba10a00-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/00ac8ed3b4327bdd4ebbebcb2ba10a00-Reviews.html", "metareview": "", "pdf_size": 1939426, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11364844526533548468&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/00ac8ed3b4327bdd4ebbebcb2ba10a00-Abstract.html" }, { "title": "TADAM: Task dependent adaptive metric for improved few-shot learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11094", "id": "11094", "author_site": "Boris Oreshkin, Pau Rodr\u00edguez L\u00f3pez, Alexandre Lacoste", "author": "Boris Oreshkin; Pau Rodr\u00edguez L\u00f3pez; Alexandre Lacoste", "abstract": "Few-shot learning has become essential for producing models that generalize from few examples. In this work, we identify that metric scaling and metric task conditioning are important to improve the performance of few-shot algorithms. Our analysis reveals that simple metric scaling completely changes the nature of few-shot algorithm parameter updates. Metric scaling provides improvements up to 14% in accuracy for certain metrics on the mini-Imagenet 5-way 5-shot classification task. We further propose a simple and effective way of conditioning a learner on the task sample set, resulting in learning a task-dependent metric space. Moreover, we propose and empirically test a practical end-to-end optimization procedure based on auxiliary task co-training to learn a task-dependent metric space. The resulting few-shot learning model based on the task-dependent scaled metric achieves state of the art on mini-Imagenet. We confirm these results on another few-shot dataset that we introduce in this paper based on CIFAR100.", "bibtex": "@inproceedings{NEURIPS2018_66808e32,\n author = {Oreshkin, Boris and Rodr\\'{\\i}guez L\\'{o}pez, Pau and Lacoste, Alexandre},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {TADAM: Task dependent adaptive metric for improved few-shot learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/66808e327dc79d135ba18e051673d906-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/66808e327dc79d135ba18e051673d906-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/66808e327dc79d135ba18e051673d906-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/66808e327dc79d135ba18e051673d906-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/66808e327dc79d135ba18e051673d906-Reviews.html", "metareview": "", "pdf_size": 816265, "gs_citation": 1703, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15025574335418226526&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Element AI; Element AI+ CVC-UAB; Element AI", "aff_domain": "elementai.com;elementai.com;elementai.com", "email": "elementai.com;elementai.com;elementai.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/66808e327dc79d135ba18e051673d906-Abstract.html", "aff_unique_index": "0;0+1;0", "aff_unique_norm": "Element AI;Universitat Aut\u00f2noma de Barcelona", "aff_unique_dep": ";Computer Vision Center", "aff_unique_url": "https://www.elementai.com;https://www.cvc.uab.cat/", "aff_unique_abbr": "Element AI;CVC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+1;0", "aff_country_unique": "Canada;Spain" }, { "title": "TETRIS: TilE-matching the TRemendous Irregular Sparsity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11408", "id": "11408", "author_site": "Yu Ji, Ling Liang, Lei Deng, Youyang Zhang, Youhui Zhang, Yuan Xie", "author": "Yu Ji; Ling Liang; Lei Deng; Youyang Zhang; Youhui Zhang; Yuan Xie", "abstract": "Compressing neural networks by pruning weights with small magnitudes can significantly reduce the computation and storage cost. Although pruning makes the model smaller, it is difficult to get practical speedup in modern computing platforms such as CPU and GPU due to the irregularity. Structural pruning has attract a lot of research interest to make sparsity hardware-friendly. Increasing the sparsity granularity can lead to better hardware utilization, but it will compromise the sparsity for maintaining accuracy.", "bibtex": "@inproceedings{NEURIPS2018_89885ff2,\n author = {Ji, Yu and Liang, Ling and Deng, Lei and Zhang, Youyang and Zhang, Youhui and Xie, Yuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {TETRIS: TilE-matching the TRemendous Irregular Sparsity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/89885ff2c83a10305ee08bd507c1049c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/89885ff2c83a10305ee08bd507c1049c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/89885ff2c83a10305ee08bd507c1049c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/89885ff2c83a10305ee08bd507c1049c-Reviews.html", "metareview": "", "pdf_size": 419575, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=864596701012212472&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Department of Computer Science and Technology, Tsinghua University + Beijing Innovation Center for Future Chip; Department of Electrical and Computer Engineering, University of California, Santa Barbara; Department of Electrical and Computer Engineering, University of California, Santa Barbara; Department of Computer Science and Technology, Tsinghua University; Department of Computer Science and Technology, Tsinghua University + Beijing Innovation Center for Future Chip; Department of Electrical and Computer Engineering, University of California, Santa Barbara", "aff_domain": "mails.tsinghua.edu.cn;ece.ucsb.edu;ece.ucsb.edu;mails.tsinghua.edu.cn;tsinghua.edu.cn;ece.ucsb.edu", "email": "mails.tsinghua.edu.cn;ece.ucsb.edu;ece.ucsb.edu;mails.tsinghua.edu.cn;tsinghua.edu.cn;ece.ucsb.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/89885ff2c83a10305ee08bd507c1049c-Abstract.html", "aff_unique_index": "0+1;2;2;0;0+1;2", "aff_unique_norm": "Tsinghua University;Beijing Innovation Center for Future Chip;University of California, Santa Barbara", "aff_unique_dep": "Department of Computer Science and Technology;;Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.tsinghua.edu.cn;;https://www.ucsb.edu", "aff_unique_abbr": "THU;;UCSB", "aff_campus_unique_index": ";1;1;;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0+0;1;1;0;0+0;1", "aff_country_unique": "China;United States" }, { "title": "Tangent: Automatic differentiation using source-code transformation for dynamically typed array programming", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11606", "id": "11606", "author_site": "Bart van Merri\u00ebnboer, Dan Moldovan, Alexander Wiltschko", "author": "Bart van Merrienboer; Dan Moldovan; Alexander Wiltschko", "abstract": "The need to efficiently calculate first- and higher-order derivatives of increasingly complex models expressed in Python has stressed or exceeded the capabilities of available tools. In this work, we explore techniques from the field of automatic differentiation (AD) that can give researchers expressive power, performance and strong usability. These include source-code transformation (SCT), flexible gradient surgery, efficient in-place array operations, and higher-order derivatives. We implement and demonstrate these ideas in the Tangent software library for Python, the first AD framework for a dynamic language that uses SCT.", "bibtex": "@inproceedings{NEURIPS2018_748d6b6e,\n author = {van Merrienboer, Bart and Moldovan, Dan and Wiltschko, Alexander},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Tangent: Automatic differentiation using source-code transformation for dynamically typed array programming},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/748d6b6ed8e13f857ceaa6cfbdca14b8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/748d6b6ed8e13f857ceaa6cfbdca14b8-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/748d6b6ed8e13f857ceaa6cfbdca14b8-Metadata.json", "review": "", "metareview": "", "pdf_size": 273355, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3543127583608451683&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "MILA, Google Brain; Google Brain; Google Brain", "aff_domain": "google.com;google.com;google.com", "email": "google.com;google.com;google.com", "github": "https://github.com/google/tangent", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/748d6b6ed8e13f857ceaa6cfbdca14b8-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Mila;Google", "aff_unique_dep": ";Google Brain", "aff_unique_url": "https://mila.quebec;https://brain.google.com", "aff_unique_abbr": "MILA;Google Brain", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;United States" }, { "title": "Task-Driven Convolutional Recurrent Models of the Visual System", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11517", "id": "11517", "author_site": "Aran Nayebi, Daniel Bear, Jonas Kubilius, Kohitij Kar, Surya Ganguli, David Sussillo, James J DiCarlo, Daniel Yamins", "author": "Aran Nayebi; Daniel Bear; Jonas Kubilius; Kohitij Kar; Surya Ganguli; David Sussillo; James J DiCarlo; Daniel L Yamins", "abstract": "Feed-forward convolutional neural networks (CNNs) are currently state-of-the-art for object classification tasks such as ImageNet. Further, they are quantitatively accurate models of temporally-averaged responses of neurons in the primate brain's visual system. However, biological visual systems have two ubiquitous architectural features not shared with typical CNNs: local recurrence within cortical areas, and long-range feedback from downstream areas to upstream areas. Here we explored the role of recurrence in improving classification performance. We found that standard forms of recurrence (vanilla RNNs and LSTMs) do not perform well within deep CNNs on the ImageNet task. In contrast, novel cells that incorporated two structural features, bypassing and gating, were able to boost task accuracy substantially. We extended these design principles in an automated search over thousands of model architectures, which identified novel local recurrent cells and long-range feedback connections useful for object recognition. Moreover, these task-optimized ConvRNNs matched the dynamics of neural activity in the primate visual system better than feedforward networks, suggesting a role for the brain's recurrent connections in performing difficult visual behaviors.", "bibtex": "@inproceedings{NEURIPS2018_6be93f7a,\n author = {Nayebi, Aran and Bear, Daniel and Kubilius, Jonas and Kar, Kohitij and Ganguli, Surya and Sussillo, David and DiCarlo, James J and Yamins, Daniel L},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Task-Driven Convolutional Recurrent Models of the Visual System},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6be93f7a96fed60c477d30ae1de032fd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6be93f7a96fed60c477d30ae1de032fd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6be93f7a96fed60c477d30ae1de032fd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6be93f7a96fed60c477d30ae1de032fd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6be93f7a96fed60c477d30ae1de032fd-Reviews.html", "metareview": "", "pdf_size": 2045925, "gs_citation": 195, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11039722383223148947&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "Neurosciences PhD Program, Stanford University; Department of Psychology, Stanford University; Brain and Cognition, KU Leuven + McGovern Institute for Brain Research, MIT; McGovern Institute for Brain Research, MIT; Department of Applied Physics, Stanford University + Google Brain, Google, Inc.; Google Brain, Google, Inc.; Department of Brain and Cognitive Sciences, MIT + McGovern Institute for Brain Research, MIT; Department of Computer Science, Stanford University + Wu Tsai Neurosciences Institute, Stanford", "aff_domain": "stanford.edu;stanford.edu;mit.edu; ; ; ; ; ", "email": "stanford.edu;stanford.edu;mit.edu; ; ; ; ; ", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6be93f7a96fed60c477d30ae1de032fd-Abstract.html", "aff_unique_index": "0;0;1+2;2;0+3;3;2+2;0+0", "aff_unique_norm": "Stanford University;KU Leuven;Massachusetts Institute of Technology;Google", "aff_unique_dep": "Neurosciences PhD Program;Brain and Cognition;McGovern Institute for Brain Research;Google Brain", "aff_unique_url": "https://www.stanford.edu;https://www.kuleuven.be;https://www.mit.edu;https://www.google.com", "aff_unique_abbr": "Stanford;KU Leuven;MIT;Google", "aff_campus_unique_index": "0;0;2;2;0+3;3;2+2;0+0", "aff_campus_unique": "Stanford;;Cambridge;Mountain View", "aff_country_unique_index": "0;0;1+0;0;0+0;0;0+0;0+0", "aff_country_unique": "United States;Belgium" }, { "title": "Teaching Inverse Reinforcement Learners via Features and Demonstrations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11809", "id": "11809", "author_site": "Luis Haug, Sebastian Tschiatschek, Adish Singla", "author": "Luis Haug; Sebastian Tschiatschek; Adish Singla", "abstract": "Learning near-optimal behaviour from an expert's demonstrations typically relies on the assumption that the learner knows the features that the true reward function depends on. In this paper, we study the problem of learning from demonstrations in the setting where this is not the case, i.e., where there is a mismatch between the worldviews of the learner and the expert. We introduce a natural quantity, the teaching risk, which measures the potential suboptimality of policies that look optimal to the learner in this setting. We show that bounds on the teaching risk guarantee that the learner is able to find a near-optimal policy using standard algorithms based on inverse reinforcement learning. Based on these findings, we suggest a teaching scheme in which the expert can decrease the teaching risk by updating the learner's worldview, and thus ultimately enable her to find a near-optimal policy.", "bibtex": "@inproceedings{NEURIPS2018_4928e751,\n author = {Haug, Luis and Tschiatschek, Sebastian and Singla, Adish},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Teaching Inverse Reinforcement Learners via Features and Demonstrations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4928e7510f45da6575b04a28519c09ed-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4928e7510f45da6575b04a28519c09ed-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4928e7510f45da6575b04a28519c09ed-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4928e7510f45da6575b04a28519c09ed-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4928e7510f45da6575b04a28519c09ed-Reviews.html", "metareview": "", "pdf_size": 554025, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13014177803520317788&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Department of Computer Science, ETH Zurich; Microsoft Research, Cambridge, UK; Max Planck Institute for Software Systems, Saarbr\u00fccken, Germany", "aff_domain": "inf.ethz.ch;microsoft.com;mpi-sws.org", "email": "inf.ethz.ch;microsoft.com;mpi-sws.org", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4928e7510f45da6575b04a28519c09ed-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "ETH Zurich;Microsoft;Max Planck Institute for Software Systems", "aff_unique_dep": "Department of Computer Science;Microsoft Research;", "aff_unique_url": "https://www.ethz.ch;https://www.microsoft.com/en-us/research;https://www.mpi-sws.org", "aff_unique_abbr": "ETHZ;MSR;MPI-SWS", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Saarbr\u00fccken", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Switzerland;United Kingdom;Germany" }, { "title": "Temporal Regularization for Markov Decision Process", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11191", "id": "11191", "author_site": "Pierre Thodoroff, Audrey Durand, Joelle Pineau, Doina Precup", "author": "Pierre Thodoroff; Audrey Durand; Joelle Pineau; Doina Precup", "abstract": "Several applications of Reinforcement Learning suffer from instability due to high\nvariance. This is especially prevalent in high dimensional domains. Regularization\nis a commonly used technique in machine learning to reduce variance, at the cost\nof introducing some bias. Most existing regularization techniques focus on spatial\n(perceptual) regularization. Yet in reinforcement learning, due to the nature of the\nBellman equation, there is an opportunity to also exploit temporal regularization\nbased on smoothness in value estimates over trajectories. This paper explores a\nclass of methods for temporal regularization. We formally characterize the bias\ninduced by this technique using Markov chain concepts. We illustrate the various\ncharacteristics of temporal regularization via a sequence of simple discrete and\ncontinuous MDPs, and show that the technique provides improvement even in\nhigh-dimensional Atari games.", "bibtex": "@inproceedings{NEURIPS2018_4b025079,\n author = {Thodoroff, Pierre and Durand, Audrey and Pineau, Joelle and Precup, Doina},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Temporal Regularization for Markov Decision Process},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4b0250793549726d5c1ea3906726ebfe-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4b0250793549726d5c1ea3906726ebfe-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4b0250793549726d5c1ea3906726ebfe-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4b0250793549726d5c1ea3906726ebfe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4b0250793549726d5c1ea3906726ebfe-Reviews.html", "metareview": "", "pdf_size": 491384, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12308924458627658967&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "McGill University; McGill University; McGill University + Facebook AI Research; McGill University", "aff_domain": "mail.mcgill.ca;mcgill.ca;cs.mcgill.ca;cs.mcgill.ca", "email": "mail.mcgill.ca;mcgill.ca;cs.mcgill.ca;cs.mcgill.ca", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4b0250793549726d5c1ea3906726ebfe-Abstract.html", "aff_unique_index": "0;0;0+1;0", "aff_unique_norm": "McGill University;Meta", "aff_unique_dep": ";Facebook AI Research", "aff_unique_url": "https://www.mcgill.ca;https://research.facebook.com", "aff_unique_abbr": "McGill;FAIR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+1;0", "aff_country_unique": "Canada;United States" }, { "title": "Temporal alignment and latent Gaussian process factor inference in population spike trains", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11988", "id": "11988", "author_site": "Lea Duncker, Maneesh Sahani", "author": "Lea Duncker; Maneesh Sahani", "abstract": "We introduce a novel scalable approach to identifying common latent structure in neural population spike-trains, which allows for variability both in the trajectory and in the rate of progression of the underlying computation. Our approach is based on shared latent Gaussian processes (GPs) which are combined linearly, as in the Gaussian Process Factor Analysis (GPFA) algorithm. We extend GPFA to handle unbinned spike-train data by incorporating a continuous time point-process likelihood model, achieving scalability with a sparse variational approximation. Shared variability is separated into terms that express condition dependence, as well as trial-to-trial variation in trajectories. Finally, we introduce a nested GP formulation to capture variability in the rate of evolution along the trajectory. We show that the new method learns to recover latent trajectories in synthetic data, and can accurately identify the trial-to-trial timing of movement-related parameters from motor cortical data without any supervision.", "bibtex": "@inproceedings{NEURIPS2018_d1ff1ec8,\n author = {Duncker, Lea and Sahani, Maneesh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Temporal alignment and latent Gaussian process factor inference in population spike trains},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d1ff1ec86b62cd5f3903ff19c3a326b2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d1ff1ec86b62cd5f3903ff19c3a326b2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d1ff1ec86b62cd5f3903ff19c3a326b2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d1ff1ec86b62cd5f3903ff19c3a326b2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d1ff1ec86b62cd5f3903ff19c3a326b2-Reviews.html", "metareview": "", "pdf_size": 1502806, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14654811377240859655&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Gatsby Computational Neuroscience Unit, University College London; Gatsby Computational Neuroscience Unit, University College London", "aff_domain": "gatsby.ucl.ac.uk;gatsby.ucl.ac.uk", "email": "gatsby.ucl.ac.uk;gatsby.ucl.ac.uk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d1ff1ec86b62cd5f3903ff19c3a326b2-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "Gatsby Computational Neuroscience Unit", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Testing for Families of Distributions via the Fourier Transform", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11954", "id": "11954", "author_site": "Alistair Stewart, Ilias Diakonikolas, Cl\u00e9ment L Canonne", "author": "Cl\u00e9ment L Canonne; Ilias Diakonikolas; Alistair Stewart", "abstract": "We study the general problem of testing whether an unknown discrete distribution belongs to a specified family of distributions. More specifically, given a distribution family P and sample access to an unknown discrete distribution D , we want to distinguish (with high probability) between the case that D in P and the case that D is \u03b5-far, in total variation distance, from every distribution in P . This is the prototypical hypothesis testing problem that has received significant attention in statistics and, more recently, in computer science. The main contribution of this work is a simple and general testing technique that is applicable to all distribution families whose Fourier spectrum satisfies a certain approximate sparsity property. We apply our Fourier-based framework to obtain near sample-optimal and computationally efficient testers for the following fundamental distribution families: Sums of Independent Integer Random Variables (SIIRVs), Poisson Multinomial Distributions (PMDs), and Discrete Log-Concave Distributions. For the first two, ours are the first non-trivial testers in the literature, vastly generalizing previous work on testing Poisson Binomial Distributions. For the third, our tester improves on prior work in both sample and time complexity.", "bibtex": "@inproceedings{NEURIPS2018_aa8fdbb7,\n author = {Canonne, Cl\\'{e}ment L and Diakonikolas, Ilias and Stewart, Alistair},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Testing for Families of Distributions via the Fourier Transform},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/aa8fdbb7d8159b3048daca36fe5c06d2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/aa8fdbb7d8159b3048daca36fe5c06d2-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/aa8fdbb7d8159b3048daca36fe5c06d2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/aa8fdbb7d8159b3048daca36fe5c06d2-Reviews.html", "metareview": "", "pdf_size": 349650, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4097894176100522129&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Stanford University; University of Southern California; University of Southern California", "aff_domain": "stanford.edu;usc.edu;gmail.com", "email": "stanford.edu;usc.edu;gmail.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/aa8fdbb7d8159b3048daca36fe5c06d2-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Stanford University;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.usc.edu", "aff_unique_abbr": "Stanford;USC", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Stanford;Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Text-Adaptive Generative Adversarial Networks: Manipulating Images with Natural Language", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11032", "id": "11032", "author_site": "Seonghyeon Nam, Yunji Kim, Seon Joo Kim", "author": "Seonghyeon Nam; Yunji Kim; Seon Joo Kim", "abstract": "This paper addresses the problem of manipulating images using natural language description. Our task aims to semantically modify visual attributes of an object in an image according to the text describing the new visual appearance. Although existing methods synthesize images having new attributes, they do not fully preserve text-irrelevant contents of the original image. In this paper, we propose the text-adaptive generative adversarial network (TAGAN) to generate semantically manipulated images while preserving text-irrelevant contents. The key to our method is the text-adaptive discriminator that creates word level local discriminators according to input text to classify fine-grained attributes independently. With this discriminator, the generator learns to generate images where only regions that correspond to the given text is modified. Experimental results show that our method outperforms existing methods on CUB and Oxford-102 datasets, and our results were mostly preferred on a user study. Extensive analysis shows that our method is able to effectively disentangle visual attributes and produce pleasing outputs.", "bibtex": "@inproceedings{NEURIPS2018_d645920e,\n author = {Nam, Seonghyeon and Kim, Yunji and Kim, Seon Joo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Text-Adaptive Generative Adversarial Networks: Manipulating Images with Natural Language},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d645920e395fedad7bbbed0eca3fe2e0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d645920e395fedad7bbbed0eca3fe2e0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d645920e395fedad7bbbed0eca3fe2e0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d645920e395fedad7bbbed0eca3fe2e0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d645920e395fedad7bbbed0eca3fe2e0-Reviews.html", "metareview": "", "pdf_size": 2683926, "gs_citation": 263, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16297852187992554128&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Yonsei University; Yonsei University; Yonsei University", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "email": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d645920e395fedad7bbbed0eca3fe2e0-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "title": "The Cluster Description Problem - Complexity Results, Formulations and Approximations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11600", "id": "11600", "author_site": "Ian Davidson, Antoine Gourru, S. S. Ravi", "author": "Ian Davidson; Antoine Gourru; S Ravi", "abstract": "Consider the situation where you are given an existing $k$-way clustering $\\pi$. A challenge for explainable AI is to find a compact and distinct explanations of each cluster which in this paper is using instance-level descriptors/tags from a common dictionary. Since the descriptors/tags were not given to the clustering method, this is not a semi-supervised learning situation. We show that the \\emph{feasibility} problem of just testing whether any distinct description (not the most compact) exists is generally intractable for just two clusters. This means that unless \\textbf{P} = \\cnp, there cannot exist an efficient algorithm for the cluster description problem. Hence, we explore ILP formulations for smaller problems and a relaxed but restricted setting that leads to a polynomial time algorithm for larger problems. We explore several extension to the basic setting such as the ability to ignore some instances and composition constraints on the descriptions of the clusters. We show our formulation's usefulness on Twitter data where the communities were found using social connectivity (i.e. \\texttt{follower} relation) but the explanation of the communities is based on behavioral properties of the nodes (i.e. hashtag usage) not available to the clustering method.", "bibtex": "@inproceedings{NEURIPS2018_3fd60983,\n author = {Davidson, Ian and Gourru, Antoine and Ravi, S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Cluster Description Problem - Complexity Results, Formulations and Approximations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3fd60983292458bf7dee75f12d5e9e05-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3fd60983292458bf7dee75f12d5e9e05-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3fd60983292458bf7dee75f12d5e9e05-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3fd60983292458bf7dee75f12d5e9e05-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3fd60983292458bf7dee75f12d5e9e05-Reviews.html", "metareview": "", "pdf_size": 3147245, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14003486369918831907&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, University of California - Davis + Institute of Advanced Studies Fellow 2017-2018 at Collegium de Lyon; Universite de Lyon (ERIC, Lyon 2); Biocomplexity Institute, University of Virginia + Dept. of Computer Science, University at Albany \u2013 State University of New York", "aff_domain": "cs.ucdavis.edu;univ-lyon2.fr;gmail.com", "email": "cs.ucdavis.edu;univ-lyon2.fr;gmail.com", "github": "", "project": "www.cs.ucdavis.edu/~davidson/description-clustering", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3fd60983292458bf7dee75f12d5e9e05-Abstract.html", "aff_unique_index": "0+1;2;3+4", "aff_unique_norm": "University of California, Davis;Collegium de Lyon;Universite de Lyon;University of Virginia;University at Albany \u2013 State University of New York", "aff_unique_dep": "Department of Computer Science;Institute of Advanced Studies;ERIC;Biocomplexity Institute;Dept. of Computer Science", "aff_unique_url": "https://www.ucdavis.edu;https://www.collegiumde lyon.org;https://www.universite-lyon.fr;https://www.virginia.edu;https://www.albany.edu", "aff_unique_abbr": "UC Davis;;UDL;UVA;UAlbany", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Davis;;Lyon;Albany", "aff_country_unique_index": "0+1;1;0+0", "aff_country_unique": "United States;France" }, { "title": "The Convergence of Sparsified Gradient Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11580", "id": "11580", "author_site": "Dan Alistarh, Torsten Hoefler, Mikael Johansson, Nikola Konstantinov, Sarit Khirirat, Cedric Renggli", "author": "Dan Alistarh; Torsten Hoefler; Mikael Johansson; Nikola Konstantinov; Sarit Khirirat; Cedric Renggli", "abstract": "Distributed training of massive machine learning models, in particular deep neural networks, via Stochastic Gradient Descent (SGD) is becoming commonplace. Several families of communication-reduction methods, such as quantization, large-batch methods, and gradient sparsification, have been proposed. To date, gradient sparsification methods--where each node sorts gradients by magnitude, and only communicates a subset of the components, accumulating the rest locally--are known to yield some of the largest practical gains. Such methods can reduce the amount of communication per step by up to \\emph{three orders of magnitude}, while preserving model accuracy. Yet, this family of methods currently has no theoretical justification.", "bibtex": "@inproceedings{NEURIPS2018_31445061,\n author = {Alistarh, Dan and Hoefler, Torsten and Johansson, Mikael and Konstantinov, Nikola and Khirirat, Sarit and Renggli, Cedric},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Convergence of Sparsified Gradient Methods},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/314450613369e0ee72d0da7f6fee773c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/314450613369e0ee72d0da7f6fee773c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/314450613369e0ee72d0da7f6fee773c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/314450613369e0ee72d0da7f6fee773c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/314450613369e0ee72d0da7f6fee773c-Reviews.html", "metareview": "", "pdf_size": 614279, "gs_citation": 640, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16113548723344874874&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 27, "aff": "IST Austria; ETH Zurich; KTH; KTH; IST Austria; ETH Zurich", "aff_domain": "ist.ac.at;inf.ethz.ch;kth.se;kth.se;ist.ac.at;inf.ethz.ch", "email": "ist.ac.at;inf.ethz.ch;kth.se;kth.se;ist.ac.at;inf.ethz.ch", "github": "", "project": "https://arxiv.org/abs/1809.10505", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/314450613369e0ee72d0da7f6fee773c-Abstract.html", "aff_unique_index": "0;1;2;2;0;1", "aff_unique_norm": "Institute of Science and Technology Austria;ETH Zurich;KTH Royal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ist.ac.at;https://www.ethz.ch;https://www.kth.se", "aff_unique_abbr": "IST Austria;ETHZ;KTH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;0;1", "aff_country_unique": "Austria;Switzerland;Sweden" }, { "title": "The Description Length of Deep Learning models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11232", "id": "11232", "author_site": "L\u00e9onard Blier, Yann Ollivier", "author": "L\u00e9onard Blier; Yann Ollivier", "abstract": "Deep learning models often have more parameters than observations, and still perform well. This is sometimes described as a paradox. In this work, we show experimentally that despite their huge number of parameters, deep neural networks can compress the data losslessly even when taking the cost of encoding the parameters into account. Such a compression viewpoint originally motivated the use of variational methods in neural networks. However, we show that these variational methods provide surprisingly poor compression bounds, despite being explicitly built to minimize such bounds. This might explain the relatively poor practical performance of variational methods in deep learning. Better encoding methods, imported from the Minimum Description Length (MDL) toolbox, yield much better compression values on deep networks.", "bibtex": "@inproceedings{NEURIPS2018_3b712de4,\n author = {Blier, L\\'{e}onard and Ollivier, Yann},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Description Length of Deep Learning models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3b712de48137572f3849aabd5666a4e3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3b712de48137572f3849aabd5666a4e3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3b712de48137572f3849aabd5666a4e3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3b712de48137572f3849aabd5666a4e3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3b712de48137572f3849aabd5666a4e3-Reviews.html", "metareview": "", "pdf_size": 370790, "gs_citation": 117, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1129581109217824919&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "\u00c9cole Normale Sup\u00e9rieure, Paris, France; Facebook Artificial Intelligence Research, Paris, France", "aff_domain": "normalesup.org;fb.com", "email": "normalesup.org;fb.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3b712de48137572f3849aabd5666a4e3-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "\u00c9cole Normale Sup\u00e9rieure;Meta", "aff_unique_dep": ";Artificial Intelligence Research", "aff_unique_url": "https://www.ens.fr;https://research.facebook.com", "aff_unique_abbr": "ENS;FAIR", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Paris", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "The Effect of Network Width on the Performance of Large-batch Training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11886", "id": "11886", "author_site": "Lingjiao Chen, Hongyi Wang, Jinman Zhao, Dimitris Papailiopoulos, Paraschos Koutris", "author": "Lingjiao Chen; Hongyi Wang; Jinman Zhao; Dimitris Papailiopoulos; Paraschos Koutris", "abstract": "Distributed implementations of mini-batch stochastic gradient descent (SGD) suffer from communication overheads, attributed to the high frequency of gradient updates inherent in small-batch training. Training with large batches can reduce these overheads; however it besets the convergence of the algorithm and the generalization performance.", "bibtex": "@inproceedings{NEURIPS2018_e7c573c1,\n author = {Chen, Lingjiao and Wang, Hongyi and Zhao, Jinman and Papailiopoulos, Dimitris and Koutris, Paraschos},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Effect of Network Width on the Performance of Large-batch Training},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e7c573c14a09b84f6b7782ce3965f335-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e7c573c14a09b84f6b7782ce3965f335-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e7c573c14a09b84f6b7782ce3965f335-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e7c573c14a09b84f6b7782ce3965f335-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e7c573c14a09b84f6b7782ce3965f335-Reviews.html", "metareview": "", "pdf_size": 786950, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7218156055131367452&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of Computer Sciences; Department of Computer Sciences; Department of Computer Sciences; Department of Computer Sciences+Department of Electrical and Computer Engineering; Department of Electrical and Computer Engineering", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e7c573c14a09b84f6b7782ce3965f335-Abstract.html", "aff_unique_index": "0;0;0;0+1;1", "aff_unique_norm": "University of Wisconsin-Madison;Unknown Institution", "aff_unique_dep": "Department of Computer Sciences;Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.cs.wisc.edu;", "aff_unique_abbr": "UW-Madison;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "title": "The Everlasting Database: Statistical Validity at a Fair Price", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11631", "id": "11631", "author_site": "Blake Woodworth, Vitaly Feldman, Saharon Rosset, Nati Srebro", "author": "Blake E Woodworth; Vitaly Feldman; Saharon Rosset; Nati Srebro", "abstract": "The problem of handling adaptivity in data analysis, intentional or not, permeates\n a variety of fields, including test-set overfitting in ML challenges and the\n accumulation of invalid scientific discoveries.\n We propose a mechanism for answering an arbitrarily long sequence of\n potentially adaptive statistical queries, by charging a price for\n each query and using the proceeds to collect additional samples.\n Crucially, we guarantee statistical validity without any assumptions on\n how the queries are generated. We also ensure with high probability that\n the cost for $M$ non-adaptive queries is $O(\\log M)$,\n while the cost to a potentially adaptive user who makes $M$\n queries that do not depend on any others is $O(\\sqrt{M})$.", "bibtex": "@inproceedings{NEURIPS2018_4ad13f04,\n author = {Woodworth, Blake E and Feldman, Vitaly and Rosset, Saharon and Srebro, Nati},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Everlasting Database: Statistical Validity at a Fair Price},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4ad13f04ef4373992c9d3046200aa350-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4ad13f04ef4373992c9d3046200aa350-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4ad13f04ef4373992c9d3046200aa350-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4ad13f04ef4373992c9d3046200aa350-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4ad13f04ef4373992c9d3046200aa350-Reviews.html", "metareview": "", "pdf_size": 374824, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8959500221877334717&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4ad13f04ef4373992c9d3046200aa350-Abstract.html" }, { "title": "The Global Anchor Method for Quantifying Linguistic Shifts and Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11896", "id": "11896", "author_site": "Zi Yin, Vin Sachidananda, Balaji Prabhakar", "author": "Zi Yin; Vin Sachidananda; Balaji Prabhakar", "abstract": "Language is dynamic, constantly evolving and adapting with respect to time, domain or topic. The adaptability of language is an active research area, where researchers discover social, cultural and domain-specific changes in language using distributional tools such as word embeddings. In this paper, we introduce the global anchor method for detecting corpus-level language shifts. We show both theoretically and empirically that the global anchor method is equivalent to the alignment method, a widely-used method for comparing word embeddings, in terms of detecting corpus-level language shifts. Despite their equivalence in terms of detection abilities, we demonstrate that the global anchor method is superior in terms of applicability as it can compare embeddings of different dimensionalities. Furthermore, the global anchor method has implementation and parallelization advantages. We show that the global anchor method reveals fine structures in the evolution of language and domain adaptation. When combined with the graph Laplacian technique, the global anchor method recovers the evolution trajectory and domain clustering of disparate text corpora.", "bibtex": "@inproceedings{NEURIPS2018_80b618eb,\n author = {Yin, Zi and Sachidananda, Vin and Prabhakar, Balaji},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Global Anchor Method for Quantifying Linguistic Shifts and Domain Adaptation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/80b618ebcac7aa97a6dac2ba65cb7e36-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/80b618ebcac7aa97a6dac2ba65cb7e36-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/80b618ebcac7aa97a6dac2ba65cb7e36-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/80b618ebcac7aa97a6dac2ba65cb7e36-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/80b618ebcac7aa97a6dac2ba65cb7e36-Reviews.html", "metareview": "", "pdf_size": 519455, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2515434334360942828&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Department of Electrical Engineering, Stanford University; Department of Electrical Engineering, Stanford University; Department of Electrical Engineering + Department of Computer Science, Stanford University", "aff_domain": "gmail.com;stanford.edu;stanford.edu", "email": "gmail.com;stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/80b618ebcac7aa97a6dac2ba65cb7e36-Abstract.html", "aff_unique_index": "0;0;1+0", "aff_unique_norm": "Stanford University;Institution not specified", "aff_unique_dep": "Department of Electrical Engineering;Department of Electrical Engineering", "aff_unique_url": "https://www.stanford.edu;", "aff_unique_abbr": "Stanford;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "title": "The Importance of Sampling inMeta-Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11884", "id": "11884", "author_site": "Bradly Stadie, Ge Yang, Rein Houthooft, Peter Chen, Yan Duan, Yuhuai Wu, Pieter Abbeel, Ilya Sutskever", "author": "Bradly Stadie; Ge Yang; Rein Houthooft; Peter Chen; Yan Duan; Yuhuai Wu; Pieter Abbeel; Ilya Sutskever", "abstract": "We interpret meta-reinforcement learning as the problem of learning how to quickly find a good sampling distribution in a new environment. This interpretation leads to the development of two new meta-reinforcement learning algorithms: E-MAML and E-$\\text{RL}^2$. Results are presented on a new environment we call `Krazy World': a difficult high-dimensional gridworld which is designed to highlight the importance of correctly differentiating through sampling distributions in meta-reinforcement learning. Further results are presented on a set of maze environments. We show E-MAML and E-$\\text{RL}^2$ deliver better performance than baseline algorithms on both tasks.", "bibtex": "@inproceedings{NEURIPS2018_d0f5722f,\n author = {Stadie, Bradly and Yang, Ge and Houthooft, Rein and Chen, Peter and Duan, Yan and Wu, Yuhuai and Abbeel, Pieter and Sutskever, Ilya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Importance of Sampling inMeta-Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d0f5722f11a0cc839fa2ca6ea49d8585-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d0f5722f11a0cc839fa2ca6ea49d8585-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d0f5722f11a0cc839fa2ca6ea49d8585-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d0f5722f11a0cc839fa2ca6ea49d8585-Reviews.html", "metareview": "", "pdf_size": 1740478, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15254673117622662176&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff": "UC Berkeley; University of Chicago; OpenAI; Covariant.ai; Covariant.ai; University of Toronto; UC Berkeley; OpenAI", "aff_domain": "berkeley.edu;berkeley.edu; ; ; ; ; ; ", "email": "berkeley.edu;berkeley.edu; ; ; ; ; ; ", "github": "https://github.com/episodeyang/e-maml", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d0f5722f11a0cc839fa2ca6ea49d8585-Abstract.html", "aff_unique_index": "0;1;2;3;3;4;0;2", "aff_unique_norm": "University of California, Berkeley;University of Chicago;OpenAI;Covariant.ai;University of Toronto", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.berkeley.edu;https://www.uchicago.edu;https://openai.com;https://www.covariant.ai;https://www.utoronto.ca", "aff_unique_abbr": "UC Berkeley;UChicago;OpenAI;Covariant.ai;U of T", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "The Limit Points of (Optimistic) Gradient Descent in Min-Max Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11880", "id": "11880", "author_site": "Constantinos Daskalakis, Ioannis Panageas", "author": "Constantinos Daskalakis; Ioannis Panageas", "abstract": "Motivated by applications in Optimization, Game Theory, and the training of Generative Adversarial Networks, the convergence properties of first order methods in min-max problems have received extensive study. It has been recognized that they may cycle, and there is no good understanding of their limit points when they do not. When they converge, do they converge to local min-max solutions? We characterize the limit points of two basic first order methods, namely Gradient Descent/Ascent (GDA) and Optimistic Gradient Descent Ascent (OGDA). We show that both dynamics avoid unstable critical points for almost all initializations. Moreover, for small step sizes and under mild assumptions, the set of OGDA-stable critical points is a superset of GDA-stable critical points, which is a superset of local min-max solutions (strict in some cases). The connecting thread is that the behavior of these dynamics can be studied from a dynamical systems perspective.", "bibtex": "@inproceedings{NEURIPS2018_139c3c1b,\n author = {Daskalakis, Constantinos and Panageas, Ioannis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Limit Points of (Optimistic) Gradient Descent in Min-Max Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/139c3c1b7ca46a9d4fd6d163d98af635-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/139c3c1b7ca46a9d4fd6d163d98af635-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/139c3c1b7ca46a9d4fd6d163d98af635-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/139c3c1b7ca46a9d4fd6d163d98af635-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/139c3c1b7ca46a9d4fd6d163d98af635-Reviews.html", "metareview": "", "pdf_size": 826005, "gs_citation": 321, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1647918767443056764&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "CSAIL, MIT, Cambridge, MA 02138; ISTD, SUTD, Singapore, 487371", "aff_domain": "csail.mit.edu;sutd.edu.sg", "email": "csail.mit.edu;sutd.edu.sg", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/139c3c1b7ca46a9d4fd6d163d98af635-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Singapore University of Technology and Design", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory;Innovation, Design and Enterprise", "aff_unique_url": "https://www.csail.mit.edu;https://www.sutd.edu.sg", "aff_unique_abbr": "MIT;SUTD", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Singapore" }, { "title": "The Limits of Post-Selection Generalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11619", "id": "11619", "author_site": "Jonathan Ullman, Adam Smith, Kobbi Nissim, Uri Stemmer, Thomas Steinke", "author": "Jonathan Ullman; Adam Smith; Kobbi Nissim; Uri Stemmer; Thomas Steinke", "abstract": "While statistics and machine learning offers numerous methods for ensuring generalization, these methods often fail in the presence of", "bibtex": "@inproceedings{NEURIPS2018_77ee3bc5,\n author = {Ullman, Jonathan and Smith, Adam and Nissim, Kobbi and Stemmer, Uri and Steinke, Thomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Limits of Post-Selection Generalization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/77ee3bc58ce560b86c2b59363281e914-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/77ee3bc58ce560b86c2b59363281e914-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/77ee3bc58ce560b86c2b59363281e914-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/77ee3bc58ce560b86c2b59363281e914-Reviews.html", "metareview": "", "pdf_size": 351374, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1953905221745905960&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Georgetown University; Boston University; IBM Research \u2013 Almaden; Ben-Gurion University; Northeastern University", "aff_domain": "georgetown.edu;bu.edu;thomas-steinke.net;uri.co.il;ccs.neu.edu", "email": "georgetown.edu;bu.edu;thomas-steinke.net;uri.co.il;ccs.neu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/77ee3bc58ce560b86c2b59363281e914-Abstract.html", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Georgetown University;Boston University;IBM;Ben-Gurion University of the Negev;Northeastern University", "aff_unique_dep": ";;IBM Research;;", "aff_unique_url": "https://www.georgetown.edu;https://www.bu.edu;https://www.ibm.com/research;https://www.bgu.ac.il;https://www.northeastern.edu", "aff_unique_abbr": "GU;BU;IBM;BGU;NEU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Almaden", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Israel" }, { "title": "The Lingering of Gradients: How to Reuse Gradients Over Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11142", "id": "11142", "author_site": "Zeyuan Allen-Zhu, David Simchi-Levi, Xinshang Wang", "author": "Zeyuan Allen-Zhu; David Simchi-Levi; Xinshang Wang", "abstract": "Classically, the time complexity of a first-order method is estimated by its number of gradient computations. In this paper, we study a more refined complexity by taking into account the ``lingering'' of gradients: once a gradient is computed at $x_k$, the additional time to compute gradients at $x_{k+1},x_{k+2},\\dots$ may be reduced.\n\nWe show how this improves the running time of gradient descent and SVRG. For instance, if the \"additional time'' scales linearly with respect to the traveled distance, then the \"convergence rate'' of gradient descent can be improved from $1/T$ to $\\exp(-T^{1/3})$. On the empirical side, we solve a hypothetical revenue management problem on the Yahoo! Front Page Today Module application with 4.6m users to $10^{-6}$ error (or $10^{-12}$ dual error) using 6 passes of the dataset.", "bibtex": "@inproceedings{NEURIPS2018_b4288d9c,\n author = {Allen-Zhu, Zeyuan and Simchi-Levi, David and Wang, Xinshang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Lingering of Gradients: How to Reuse Gradients Over Time},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b4288d9c0ec0a1841b3b3728321e7088-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b4288d9c0ec0a1841b3b3728321e7088-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b4288d9c0ec0a1841b3b3728321e7088-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b4288d9c0ec0a1841b3b3728321e7088-Reviews.html", "metareview": "", "pdf_size": 1417463, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6440121194853481468&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Microsoft Research AI; MIT; MIT", "aff_domain": "csail.mit.edu;mit.edu;mit.edu", "email": "csail.mit.edu;mit.edu;mit.edu", "github": "", "project": "https://arxiv.org/abs/1901.02871", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b4288d9c0ec0a1841b3b3728321e7088-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Microsoft;Massachusetts Institute of Technology", "aff_unique_dep": "AI;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://web.mit.edu", "aff_unique_abbr": "MSR;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Nearest Neighbor Information Estimator is Adaptively Near Minimax Rate-Optimal", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11320", "id": "11320", "author_site": "Jiantao Jiao, Weihao Gao, Yanjun Han", "author": "Jiantao Jiao; Weihao Gao; Yanjun Han", "abstract": "We analyze the Kozachenko\u2013Leonenko (KL) fixed k-nearest neighbor estimator for the differential entropy. We obtain the first uniform upper bound on its performance for any fixed k over H\\\"{o}lder balls on a torus without assuming any conditions on how close the density could be from zero. Accompanying a recent minimax lower bound over the H\\\"{o}lder ball, we show that the KL estimator for any fixed k is achieving the minimax rates up to logarithmic factors without cognizance of the smoothness parameter s of the H\\\"{o}lder ball for $s \\in (0,2]$ and arbitrary dimension d, rendering it the first estimator that provably satisfies this property.", "bibtex": "@inproceedings{NEURIPS2018_e9fd7c2c,\n author = {Jiao, Jiantao and Gao, Weihao and Han, Yanjun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Nearest Neighbor Information Estimator is Adaptively Near Minimax Rate-Optimal},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e9fd7c2c6623306db59b6aef5c0d5cac-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e9fd7c2c6623306db59b6aef5c0d5cac-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e9fd7c2c6623306db59b6aef5c0d5cac-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e9fd7c2c6623306db59b6aef5c0d5cac-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e9fd7c2c6623306db59b6aef5c0d5cac-Reviews.html", "metareview": "", "pdf_size": 319059, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8070856348284517429&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Electrical Engineering and Computer Sciences, University of California, Berkeley; Department of ECE, Coordinated Science Laboratory, University of Illinois at Urbana-Champaign; Department of Electrical Engineering, Stanford University", "aff_domain": "berkeley.edu;illinois.edu;stanford.edu", "email": "berkeley.edu;illinois.edu;stanford.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e9fd7c2c6623306db59b6aef5c0d5cac-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Berkeley;University of Illinois Urbana-Champaign;Stanford University", "aff_unique_dep": "Department of Electrical Engineering and Computer Sciences;Department of Electrical and Computer Engineering;Department of Electrical Engineering", "aff_unique_url": "https://www.berkeley.edu;https://illinois.edu;https://www.stanford.edu", "aff_unique_abbr": "UC Berkeley;UIUC;Stanford", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Berkeley;Urbana-Champaign;Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Pessimistic Limits and Possibilities of Margin-based Losses in Semi-supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11192", "id": "11192", "author_site": "Jesse Krijthe, Marco Loog", "author": "Jesse Krijthe; Marco Loog", "abstract": "Consider a classification problem where we have both labeled and unlabeled data available. We show that for linear classifiers defined by convex margin-based surrogate losses that are decreasing, it is impossible to construct \\emph{any} semi-supervised approach that is able to guarantee an improvement over the supervised classifier measured by this surrogate loss on the labeled and unlabeled data. For convex margin-based loss functions that also increase, we demonstrate safe improvements \\emph{are} possible.", "bibtex": "@inproceedings{NEURIPS2018_b6a1085a,\n author = {Krijthe, Jesse and Loog, Marco},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Pessimistic Limits and Possibilities of Margin-based Losses in Semi-supervised Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/b6a1085a27ab7bff7550f8a3bd017df8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/b6a1085a27ab7bff7550f8a3bd017df8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/b6a1085a27ab7bff7550f8a3bd017df8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/b6a1085a27ab7bff7550f8a3bd017df8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/b6a1085a27ab7bff7550f8a3bd017df8-Reviews.html", "metareview": "", "pdf_size": 472236, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5543469160669368863&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Radboud University, The Netherlands; Delft University of Technology, The Netherlands+University of Copenhagen, Denmark", "aff_domain": "gmail.com;tudelft.nl", "email": "gmail.com;tudelft.nl", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/b6a1085a27ab7bff7550f8a3bd017df8-Abstract.html", "aff_unique_index": "0;1+2", "aff_unique_norm": "Radboud University;Delft University of Technology;University of Copenhagen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ru.nl;https://www.tudelft.nl;https://www.ku.dk", "aff_unique_abbr": "RU;TUDelft;UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+1", "aff_country_unique": "Netherlands;Denmark" }, { "title": "The Physical Systems Behind Optimization Algorithms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11432", "id": "11432", "author_site": "Lin Yang, Raman Arora, Vladimir Braverman, Tuo Zhao", "author": "Lin Yang; Raman Arora; Vladimir braverman; Tuo Zhao", "abstract": "We use differential equations based approaches to provide some {\\it \\textbf{physics}} insights into analyzing the dynamics of popular optimization algorithms in machine learning. In particular, we study gradient descent, proximal gradient descent, coordinate gradient descent, proximal coordinate gradient, and Newton's methods as well as their Nesterov's accelerated variants in a unified framework motivated by a natural connection of optimization algorithms to physical systems. Our analysis is applicable to more general algorithms and optimization problems {\\it \\textbf{beyond}} convexity and strong convexity, e.g. Polyak-\\L ojasiewicz and error bound conditions (possibly nonconvex).", "bibtex": "@inproceedings{NEURIPS2018_4e62e752,\n author = {Yang, Lin and Arora, Raman and braverman, Vladimir and Zhao, Tuo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Physical Systems Behind Optimization Algorithms},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4e62e752ae53fb6a6eebd0f6146aa702-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4e62e752ae53fb6a6eebd0f6146aa702-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4e62e752ae53fb6a6eebd0f6146aa702-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4e62e752ae53fb6a6eebd0f6146aa702-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4e62e752ae53fb6a6eebd0f6146aa702-Reviews.html", "metareview": "", "pdf_size": 520557, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4989785654377725676&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Princeton University; Johns Hopkins University; Johns Hopkins University; Georgia Institute of Technology", "aff_domain": "princeton.edu;cs.jhu.edu;cs.jhu.edu;gatech.edu", "email": "princeton.edu;cs.jhu.edu;cs.jhu.edu;gatech.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4e62e752ae53fb6a6eebd0f6146aa702-Abstract.html", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Princeton University;Johns Hopkins University;Georgia Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://www.jhu.edu;https://www.gatech.edu", "aff_unique_abbr": "Princeton;JHU;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Price of Fair PCA: One Extra dimension", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12037", "id": "12037", "author_site": "Samira Samadi, Uthaipon Tantipongpipat, Jamie Morgenstern, Mohit Singh, Santosh Vempala", "author": "Samira Samadi; Uthaipon Tantipongpipat; Jamie H Morgenstern; Mohit Singh; Santosh Vempala", "abstract": "We investigate whether the standard dimensionality reduction technique of PCA inadvertently produces data representations with different fidelity for two different populations. We show on several real-world data sets, PCA has higher reconstruction error on population A than on B (for example, women versus men or lower- versus higher-educated individuals). This can happen even when the data set has a similar number of samples from A and B. This motivates our study of dimensionality reduction techniques which maintain similar fidelity for A and B. We define the notion of Fair PCA and give a polynomial-time algorithm for finding a low dimensional representation of the data which is nearly-optimal with respect to this measure. Finally, we show on real-world data sets that our algorithm can be used to efficiently generate a fair low dimensional representation of the data.", "bibtex": "@inproceedings{NEURIPS2018_cc4af25f,\n author = {Samadi, Samira and Tantipongpipat, Uthaipon and Morgenstern, Jamie H and Singh, Mohit and Vempala, Santosh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Price of Fair PCA: One Extra dimension},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cc4af25fa9d2d5c953496579b75f6f6c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cc4af25fa9d2d5c953496579b75f6f6c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cc4af25fa9d2d5c953496579b75f6f6c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cc4af25fa9d2d5c953496579b75f6f6c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cc4af25fa9d2d5c953496579b75f6f6c-Reviews.html", "metareview": "", "pdf_size": 730544, "gs_citation": 204, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6814300972813312615&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Georgia Tech; Georgia Tech; Georgia Tech; Georgia Tech; Georgia Tech", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gmail.com;cc.gatech.edu", "email": "gatech.edu;gatech.edu;gatech.edu;gmail.com;cc.gatech.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cc4af25fa9d2d5c953496579b75f6f6c-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Price of Privacy for Low-rank Factorization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11414", "id": "11414", "author": "Jalaj Upadhyay", "abstract": "In this paper, we study what price one has to pay to release \\emph{differentially private low-rank factorization} of a matrix. We consider various settings that are close to the real world applications of low-rank factorization: (i) the manner in which matrices are updated (row by row or in an arbitrary manner), (ii) whether matrices are distributed or not, and (iii) how the output is produced (once at the end of all updates, also known as \\emph{one-shot algorithms} or continually). Even though these settings are well studied without privacy, surprisingly, there are no private algorithm for these settings (except when a matrix is updated row by row). We present the first set of differentially private algorithms for all these settings.", "bibtex": "@inproceedings{NEURIPS2018_2eace51d,\n author = {Upadhyay, Jalaj},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Price of Privacy for Low-rank Factorization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2eace51d8f796d04991c831a07059758-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2eace51d8f796d04991c831a07059758-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2eace51d8f796d04991c831a07059758-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2eace51d8f796d04991c831a07059758-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2eace51d8f796d04991c831a07059758-Reviews.html", "metareview": "", "pdf_size": 573745, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12786399782046530476&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Johns Hopkins University, Baltimore, MD - 21201, USA.", "aff_domain": "jhu.edu", "email": "jhu.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2eace51d8f796d04991c831a07059758-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "0", "aff_campus_unique": "Baltimore", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "The Sample Complexity of Semi-Supervised Learning with Nonparametric Mixture Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11888", "id": "11888", "author_site": "Chen Dan, Liu Leqi, Bryon Aragam, Pradeep Ravikumar, Eric Xing", "author": "Chen Dan; Liu Leqi; Bryon Aragam; Pradeep K Ravikumar; Eric P Xing", "abstract": "We study the sample complexity of semi-supervised learning (SSL) and introduce new assumptions based on the mismatch between a mixture model learned from unlabeled data and the true mixture model induced by the (unknown) class conditional distributions. Under these assumptions, we establish an $\\Omega(K\\log K)$ labeled sample complexity bound without imposing parametric assumptions, where $K$ is the number of classes. Our results suggest that even in nonparametric settings it is possible to learn a near-optimal classifier using only a few labeled samples. Unlike previous theoretical work which focuses on binary classification, we consider general multiclass classification ($K>2$), which requires solving a difficult permutation learning problem. This permutation defines a classifier whose classification error is controlled by the Wasserstein distance between mixing measures, and we provide finite-sample results characterizing the behaviour of the excess risk of this classifier. Finally, we describe three algorithms for computing these estimators based on a connection to bipartite graph matching, and perform experiments to illustrate the superiority of the MLE over the majority vote estimator.", "bibtex": "@inproceedings{NEURIPS2018_8ba6c657,\n author = {Dan, Chen and Leqi, Liu and Aragam, Bryon and Ravikumar, Pradeep K and Xing, Eric P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Sample Complexity of Semi-Supervised Learning with Nonparametric Mixture Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8ba6c657b03fc7c8dd4dff8e45defcd2-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8ba6c657b03fc7c8dd4dff8e45defcd2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8ba6c657b03fc7c8dd4dff8e45defcd2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8ba6c657b03fc7c8dd4dff8e45defcd2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8ba6c657b03fc7c8dd4dff8e45defcd2-Reviews.html", "metareview": "", "pdf_size": 428068, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15839214175361527232&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University+Petuum Inc.", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8ba6c657b03fc7c8dd4dff8e45defcd2-Abstract.html", "aff_unique_index": "0;0;0;0;0+1", "aff_unique_norm": "Carnegie Mellon University;Petuum Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.petuum.com", "aff_unique_abbr": "CMU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0+0", "aff_country_unique": "United States" }, { "title": "The Sparse Manifold Transform", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11994", "id": "11994", "author_site": "Yubei Chen, Dylan Paiton, Bruno Olshausen", "author": "Yubei Chen; Dylan Paiton; Bruno Olshausen", "abstract": "We present a signal representation framework called the sparse manifold transform that combines key ideas from sparse coding, manifold learning, and slow feature analysis. It turns non-linear transformations in the primary sensory signal space into linear interpolations in a representational embedding space while maintaining approximate invertibility. The sparse manifold transform is an unsupervised and generative framework that explicitly and simultaneously models the sparse discreteness and low-dimensional manifold structure found in natural scenes. When stacked, it also models hierarchical composition. We provide a theoretical description of the transform and demonstrate properties of the learned representation on both synthetic data and natural videos.", "bibtex": "@inproceedings{NEURIPS2018_8e19a39c,\n author = {Chen, Yubei and Paiton, Dylan and Olshausen, Bruno},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Sparse Manifold Transform},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8e19a39c36b8e5e3afd2a3b2692aea96-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8e19a39c36b8e5e3afd2a3b2692aea96-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8e19a39c36b8e5e3afd2a3b2692aea96-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8e19a39c36b8e5e3afd2a3b2692aea96-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8e19a39c36b8e5e3afd2a3b2692aea96-Reviews.html", "metareview": "", "pdf_size": 3199372, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7582117501774460926&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Redwood Center for Theoretical Neuroscience + Department of Electrical Engineering and Computer Science + Vision Science Graduate Group; Redwood Center for Theoretical Neuroscience + Vision Science Graduate Group; Redwood Center for Theoretical Neuroscience + Vision Science Graduate Group + Helen Wills Neuroscience Institute & School of Optometry", "aff_domain": "eecs.berkeley.edu; ; ", "email": "eecs.berkeley.edu; ; ", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8e19a39c36b8e5e3afd2a3b2692aea96-Abstract.html", "aff_unique_index": "0+1+2;0+2;0+2+0", "aff_unique_norm": "University of California, Berkeley;Massachusetts Institute of Technology;University of California, Santa Barbara", "aff_unique_dep": "Center for Theoretical Neuroscience;Department of Electrical Engineering and Computer Science;Vision Science Graduate Group", "aff_unique_url": "https://www.berkeley.edu;https://web.mit.edu;https://www.ucsb.edu", "aff_unique_abbr": "UC Berkeley;MIT;UCSB", "aff_campus_unique_index": "0+1+2;0+2;0+2+0", "aff_campus_unique": "Berkeley;Cambridge;Santa Barbara", "aff_country_unique_index": "0+0+0;0+0;0+0+0", "aff_country_unique": "United States" }, { "title": "The Spectrum of the Fisher Information Matrix of a Single-Hidden-Layer Neural Network", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11528", "id": "11528", "author_site": "Jeffrey Pennington, Pratik Worah", "author": "Jeffrey Pennington; Pratik Worah", "abstract": "An important factor contributing to the success of deep learning has been the remarkable ability to optimize large neural networks using simple first-order optimization algorithms like stochastic gradient descent. While the efficiency of such methods depends crucially on the local curvature of the loss surface, very little is actually known about how this geometry depends on network architecture and hyperparameters. In this work, we extend a recently-developed framework for studying spectra of nonlinear random matrices to characterize an important measure of curvature, namely the eigenvalues of the Fisher information matrix. We focus on a single-hidden-layer neural network with Gaussian data and weights and provide an exact expression for the spectrum in the limit of infinite width. We find that linear networks suffer worse conditioning than nonlinear networks and that nonlinear networks are generically non-degenerate. We also predict and demonstrate empirically that by adjusting the nonlinearity, the spectrum can be tuned so as to improve the efficiency of first-order optimization methods.", "bibtex": "@inproceedings{NEURIPS2018_18bb68e2,\n author = {Pennington, Jeffrey and Worah, Pratik},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Spectrum of the Fisher Information Matrix of a Single-Hidden-Layer Neural Network},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/18bb68e2b38e4a8ce7cf4f6b2625768c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/18bb68e2b38e4a8ce7cf4f6b2625768c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/18bb68e2b38e4a8ce7cf4f6b2625768c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/18bb68e2b38e4a8ce7cf4f6b2625768c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/18bb68e2b38e4a8ce7cf4f6b2625768c-Reviews.html", "metareview": "", "pdf_size": 881770, "gs_citation": 95, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10163178132325090964&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Google Brain; Google Research", "aff_domain": "google.com;google.com", "email": "google.com;google.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/18bb68e2b38e4a8ce7cf4f6b2625768c-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Brain", "aff_unique_url": "https://brain.google.com", "aff_unique_abbr": "Google Brain", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "The challenge of realistic music generation: modelling raw audio at scale", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11766", "id": "11766", "author_site": "Sander Dieleman, Aaron van den Oord, Karen Simonyan", "author": "Sander Dieleman; Aaron van den Oord; Karen Simonyan", "abstract": "Realistic music generation is a challenging task. When building generative models of music that are learnt from data, typically high-level representations such as scores or MIDI are used that abstract away the idiosyncrasies of a particular performance. But these nuances are very important for our perception of musicality and realism, so in this work we embark on modelling music in the raw audio domain. It has been shown that autoregressive models excel at generating raw audio waveforms of speech, but when applied to music, we find them biased towards capturing local signal structure at the expense of modelling long-range correlations. This is problematic because music exhibits structure at many different timescales. In this work, we explore autoregressive discrete autoencoders (ADAs) as a means to enable autoregressive models to capture long-range correlations in waveforms. We find that they allow us to unconditionally generate piano music directly in the raw audio domain, which shows stylistic consistency across tens of seconds.", "bibtex": "@inproceedings{NEURIPS2018_3e441eec,\n author = {Dieleman, Sander and van den Oord, Aaron and Simonyan, Karen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The challenge of realistic music generation: modelling raw audio at scale},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3e441eec3456b703a4fe741005f3981f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3e441eec3456b703a4fe741005f3981f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3e441eec3456b703a4fe741005f3981f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3e441eec3456b703a4fe741005f3981f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3e441eec3456b703a4fe741005f3981f-Reviews.html", "metareview": "", "pdf_size": 448096, "gs_citation": 237, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7663463193618233870&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "DeepMind; DeepMind; DeepMind", "aff_domain": "google.com;google.com;google.com", "email": "google.com;google.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3e441eec3456b703a4fe741005f3981f-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "DeepMind", "aff_unique_dep": "", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "The committee machine: Computational to statistical gaps in learning a two-layers neural network", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11326", "id": "11326", "author_site": "Benjamin Aubin, Antoine Maillard, jean barbier, Florent Krzakala, Nicolas Macris, Lenka Zdeborov\u00e1", "author": "Benjamin Aubin; Antoine Maillard; jean barbier; Florent Krzakala; Nicolas Macris; Lenka Zdeborov\u00e1", "abstract": "Heuristic tools from statistical physics have been used in the past to compute the optimal learning and generalization errors in the teacher-student scenario in multi- layer neural networks. In this contribution, we provide a rigorous justification of these approaches for a two-layers neural network model called the committee machine. We also introduce a version of the approximate message passing (AMP) algorithm for the committee machine that allows to perform optimal learning in polynomial time for a large set of parameters. We find that there are regimes in which a low generalization error is information-theoretically achievable while the AMP algorithm fails to deliver it; strongly suggesting that no efficient algorithm exists for those cases, and unveiling a large computational gap.", "bibtex": "@inproceedings{NEURIPS2018_84f0f204,\n author = {Aubin, Benjamin and Maillard, Antoine and barbier, jean and Krzakala, Florent and Macris, Nicolas and Zdeborov\\'{a}, Lenka},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The committee machine: Computational to statistical gaps in learning a two-layers neural network},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/84f0f20482cde7e5eacaf7364a643d33-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/84f0f20482cde7e5eacaf7364a643d33-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/84f0f20482cde7e5eacaf7364a643d33-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/84f0f20482cde7e5eacaf7364a643d33-Reviews.html", "metareview": "", "pdf_size": 488223, "gs_citation": 131, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4903323524016093175&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 21, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/84f0f20482cde7e5eacaf7364a643d33-Abstract.html" }, { "title": "The emergence of multiple retinal cell types through efficient coding of natural movies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11894", "id": "11894", "author_site": "Samuel Ocko, Jack Lindsey, Surya Ganguli, Stephane Deny", "author": "Samuel Ocko; Jack Lindsey; Surya Ganguli; Stephane Deny", "abstract": "One of the most striking aspects of early visual processing in the retina is the immediate parcellation of visual information into multiple parallel pathways, formed by different retinal ganglion cell types each tiling the entire visual field. Existing theories of efficient coding have been unable to account for the functional advantages of such cell-type diversity in encoding natural scenes. Here we go beyond previous theories to analyze how a simple linear retinal encoding model with different convolutional cell types efficiently encodes naturalistic spatiotemporal movies given a fixed firing rate budget. We find that optimizing the receptive fields and cell densities of two cell types makes them match the properties of the two main cell types in the primate retina, midget and parasol cells, in terms of spatial and temporal sensitivity, cell spacing, and their relative ratio. Moreover, our theory gives a precise account of how the ratio of midget to parasol cells decreases with retinal eccentricity. Also, we train a nonlinear encoding model with a rectifying nonlinearity to efficiently encode naturalistic movies, and again find emergent receptive fields resembling those of midget and parasol cells that are now further subdivided into ON and OFF types. Thus our work provides a theoretical justification, based on the efficient coding of natural movies, for the existence of the four most dominant cell types in the primate retina that together comprise 70% of all ganglion cells.", "bibtex": "@inproceedings{NEURIPS2018_d94fd74d,\n author = {Ocko, Samuel and Lindsey, Jack and Ganguli, Surya and Deny, Stephane},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The emergence of multiple retinal cell types through efficient coding of natural movies},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d94fd74dcde1aa553be72c1006578b23-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d94fd74dcde1aa553be72c1006578b23-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d94fd74dcde1aa553be72c1006578b23-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d94fd74dcde1aa553be72c1006578b23-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d94fd74dcde1aa553be72c1006578b23-Reviews.html", "metareview": "", "pdf_size": 2283242, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11361812911973715079&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Department of Applied Physics, Stanford; Department of Applied Physics, Stanford; Google Brain, Mountain View, CA; Department of Applied Physics, Stanford", "aff_domain": "gmail.com; ; ;gmail.com", "email": "gmail.com; ; ;gmail.com", "github": "https://github.com/ganguli-lab/RetinalCellTypes", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d94fd74dcde1aa553be72c1006578b23-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": "Department of Applied Physics;Google Brain", "aff_unique_url": "https://www.stanford.edu;https://brain.google.com", "aff_unique_abbr": "Stanford;Google Brain", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "The promises and pitfalls of Stochastic Gradient Langevin Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11791", "id": "11791", "author_site": "Nicolas Brosse, Alain Durmus, Eric Moulines", "author": "Nicolas Brosse; Alain Durmus; Eric Moulines", "abstract": "Stochastic Gradient Langevin Dynamics (SGLD) has emerged as a key MCMC algorithm for Bayesian learning from large scale datasets. While SGLD with decreasing step sizes converges weakly to the posterior distribution, the algorithm is often used with a constant step size in practice and has demonstrated spectacular successes in machine learning tasks. The current practice is to set the step size inversely proportional to N where N is the number of training samples. As N becomes large, we show that the SGLD algorithm has an invariant probability measure which significantly departs from the target posterior and behaves like as Stochastic Gradient Descent (SGD). This difference is inherently due to the high variance of the stochastic gradients. Several strategies have been suggested to reduce this effect; among them, SGLD Fixed Point (SGLDFP) uses carefully designed control variates to reduce the variance of the stochastic gradients. We show that SGLDFP gives approximate samples from the posterior distribution, with an accuracy comparable to the Langevin Monte Carlo (LMC) algorithm for a computational cost sublinear in the number of data points. We provide a detailed analysis of the Wasserstein distances between LMC, SGLD, SGLDFP and SGD and explicit expressions of the means and covariance matrices of their invariant distributions. Our findings are supported by limited numerical experiments.", "bibtex": "@inproceedings{NEURIPS2018_335cd1b9,\n author = {Brosse, Nicolas and Durmus, Alain and Moulines, Eric},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The promises and pitfalls of Stochastic Gradient Langevin Dynamics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/335cd1b90bfa4ee70b39d08a4ae0cf2d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/335cd1b90bfa4ee70b39d08a4ae0cf2d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/335cd1b90bfa4ee70b39d08a4ae0cf2d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/335cd1b90bfa4ee70b39d08a4ae0cf2d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/335cd1b90bfa4ee70b39d08a4ae0cf2d-Reviews.html", "metareview": "", "pdf_size": 445688, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12609921781735757146&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Centre de Math\u00e9matiques Appliqu\u00e9es, UMR 7641, Ecole Polytechnique, Palaiseau, France; Centre de Math\u00e9matiques Appliqu\u00e9es, UMR 7641, Ecole Polytechnique, Palaiseau, France; Ecole Normale Sup\u00e9rieure CMLA, 61 Av. du Pr\u00e9sident Wilson 94235 Cachan Cedex, France", "aff_domain": "polytechnique.edu;polytechnique.edu;cmla.ens-cachan.fr", "email": "polytechnique.edu;polytechnique.edu;cmla.ens-cachan.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/335cd1b90bfa4ee70b39d08a4ae0cf2d-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Ecole Polytechnique;Ecole Normale Sup\u00e9rieure", "aff_unique_dep": "Centre de Math\u00e9matiques Appliqu\u00e9es;CMLA", "aff_unique_url": "https://www.polytechnique.edu;", "aff_unique_abbr": "Polytechnique;ENS", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Palaiseau;Cachan", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "The streaming rollout of deep networks - towards fully model-parallel execution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11401", "id": "11401", "author_site": "Volker Fischer, Jan Koehler, Thomas Pfeil", "author": "Volker Fischer; Jan Koehler; Thomas Pfeil", "abstract": "Deep neural networks, and in particular recurrent networks, are promising candidates to control autonomous agents that interact in real-time with the physical world. However, this requires a seamless integration of temporal features into the network\u2019s architecture. For the training of and inference with recurrent neural networks, they are usually rolled out over time, and different rollouts exist. Conventionally during inference, the layers of a network are computed in a sequential manner resulting in sparse temporal integration of information and long response times. In this study, we present a theoretical framework to describe rollouts, the level of model-parallelization they induce, and demonstrate differences in solving specific tasks. We prove that certain rollouts, also for networks with only skip and no recurrent connections, enable earlier and more frequent responses, and show empirically that these early responses have better performance. The streaming rollout maximizes these properties and enables a fully parallel execution of the network reducing runtime on massively parallel devices. Finally, we provide an open-source toolbox to design, train, evaluate, and interact with streaming rollouts.", "bibtex": "@inproceedings{NEURIPS2018_08f90c1a,\n author = {Fischer, Volker and Koehler, Jan and Pfeil, Thomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The streaming rollout of deep networks - towards fully model-parallel execution},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/08f90c1a417155361a5c4b8d297e0d78-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/08f90c1a417155361a5c4b8d297e0d78-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/08f90c1a417155361a5c4b8d297e0d78-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/08f90c1a417155361a5c4b8d297e0d78-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/08f90c1a417155361a5c4b8d297e0d78-Reviews.html", "metareview": "", "pdf_size": 458211, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4918339413298728627&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Bosch Center for Arti\ufb01cial Intelligence, Renningen, Germany; Bosch Center for Arti\ufb01cial Intelligence, Renningen, Germany; Bosch Center for Arti\ufb01cial Intelligence, Renningen, Germany", "aff_domain": "de.bosch.com;de.bosch.com;de.bosch.com", "email": "de.bosch.com;de.bosch.com;de.bosch.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/08f90c1a417155361a5c4b8d297e0d78-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Bosch Center for Arti\ufb01cial Intelligence", "aff_unique_dep": "Artificial Intelligence", "aff_unique_url": "https://www.bosch-ai.com", "aff_unique_abbr": "BCAI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Renningen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "title": "Theoretical Linear Convergence of Unfolded ISTA and Its Practical Weights and Thresholds", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11864", "id": "11864", "author_site": "Xiaohan Chen, Jialin Liu, Zhangyang Wang, Wotao Yin", "author": "Xiaohan Chen; Jialin Liu; Zhangyang Wang; Wotao Yin", "abstract": "In recent years, unfolding iterative algorithms as neural networks has become an empirical success in solving sparse recovery problems. However, its theoretical understanding is still immature, which prevents us from fully utilizing the power of neural networks. In this work, we study unfolded ISTA (Iterative Shrinkage Thresholding Algorithm) for sparse signal recovery. We introduce a weight structure that is necessary for asymptotic convergence to the true sparse signal. With this structure, unfolded ISTA can attain a linear convergence, which is better than the sublinear convergence of ISTA/FISTA in general cases. Furthermore, we propose to incorporate thresholding in the network to perform support selection, which is easy to implement and able to boost the convergence rate both theoretically and empirically. Extensive simulations, including sparse vector recovery and a compressive sensing experiment on real image data, corroborate our theoretical results and demonstrate their practical usefulness. We have made our codes publicly available: https://github.com/xchen-tamu/linear-lista-cpss.", "bibtex": "@inproceedings{NEURIPS2018_cf8c9be2,\n author = {Chen, Xiaohan and Liu, Jialin and Wang, Zhangyang and Yin, Wotao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Theoretical Linear Convergence of Unfolded ISTA and Its Practical Weights and Thresholds},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cf8c9be2a4508a24ae92c9d3d379131d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cf8c9be2a4508a24ae92c9d3d379131d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cf8c9be2a4508a24ae92c9d3d379131d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cf8c9be2a4508a24ae92c9d3d379131d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cf8c9be2a4508a24ae92c9d3d379131d-Reviews.html", "metareview": "", "pdf_size": 662664, "gs_citation": 304, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8395828592719058096&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 13, "aff": "Department of Computer Science and Engineering, Texas A&M University; Department of Mathematics, University of California, Los Angeles; Department of Computer Science and Engineering, Texas A&M University; Department of Mathematics, University of California, Los Angeles", "aff_domain": "tamu.edu;math.ucla.edu;tamu.edu;math.ucla.edu", "email": "tamu.edu;math.ucla.edu;tamu.edu;math.ucla.edu", "github": "https://github.com/xchen-tamu/linear-lista-cpss", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cf8c9be2a4508a24ae92c9d3d379131d-Abstract.html", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Texas A&M University;University of California, Los Angeles", "aff_unique_dep": "Department of Computer Science and Engineering;Department of Mathematics", "aff_unique_url": "https://www.tamu.edu;https://www.ucla.edu", "aff_unique_abbr": "TAMU;UCLA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Theoretical guarantees for EM under misspecified Gaussian mixture models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11920", "id": "11920", "author_site": "Raaz Dwivedi, nh\u1eadt H\u1ed3, Koulik Khamaru, Martin Wainwright, Michael Jordan", "author": "Raaz Dwivedi; nh\u1eadt H\u1ed3; Koulik Khamaru; Martin J. Wainwright; Michael I Jordan", "abstract": "Recent years have witnessed substantial progress in understanding\n the behavior of EM for mixture models that are correctly specified.\n Given that model misspecification is common in practice, it is\n important to understand EM in this more general setting. We provide\n non-asymptotic guarantees for population and sample-based EM for\n parameter estimation under a few specific univariate settings of\n misspecified Gaussian mixture models. Due to misspecification, the\n EM iterates no longer converge to the true model and instead\n converge to the projection of the true model over the set of models\n being searched over. We provide two classes of theoretical\n guarantees: first, we characterize the bias introduced due to the\n misspecification; and second, we prove that population EM converges\n at a geometric rate to the model projection under a suitable\n initialization condition. This geometric convergence rate for\n population EM imply a statistical complexity of order $1/\\sqrt{n}$\n when running EM with $n$ samples. We validate our theoretical\n findings in different cases via several numerical examples.", "bibtex": "@inproceedings{NEURIPS2018_acc21473,\n author = {Dwivedi, Raaz and H\u1ed3, nh\u1eadt and Khamaru, Koulik and Wainwright, Martin J and Jordan, Michael I},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Theoretical guarantees for EM under misspecified Gaussian mixture models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/acc21473c4525b922286130ffbfe00b5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/acc21473c4525b922286130ffbfe00b5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/acc21473c4525b922286130ffbfe00b5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/acc21473c4525b922286130ffbfe00b5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/acc21473c4525b922286130ffbfe00b5-Reviews.html", "metareview": "", "pdf_size": 452404, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11237454108923985127&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff": "UC Berkeley; UC Berkeley; UC Berkeley; UC Berkeley + V oleon Group; UC Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/acc21473c4525b922286130ffbfe00b5-Abstract.html", "aff_unique_index": "0;0;0;0+1;0", "aff_unique_norm": "University of California, Berkeley;Voleon Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;", "aff_unique_abbr": "UC Berkeley;", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "title": "Thermostat-assisted continuously-tempered Hamiltonian Monte Carlo for Bayesian learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12009", "id": "12009", "author_site": "Rui Luo, Jianhong Wang, Yaodong Yang, Jun WANG, Zhanxing Zhu", "author": "Rui Luo; Jianhong Wang; Yaodong Yang; Jun WANG; Zhanxing Zhu", "abstract": "In this paper, we propose a novel sampling method, the thermostat-assisted continuously-tempered Hamiltonian Monte Carlo, for the purpose of multimodal Bayesian learning. It simulates a noisy dynamical system by incorporating both a continuously-varying tempering variable and the Nos\\'e-Hoover thermostats. A significant benefit is that it is not only able to efficiently generate i.i.d. samples when the underlying posterior distributions are multimodal, but also capable of adaptively neutralising the noise arising from the use of mini-batches. While the properties of the approach have been studied using synthetic datasets, our experiments on three real datasets have also shown its performance gains over several strong baselines for Bayesian learning with various types of neural networks plunged in.", "bibtex": "@inproceedings{NEURIPS2018_fcf1d8d2,\n author = {Luo, Rui and Wang, Jianhong and Yang, Yaodong and WANG, Jun and Zhu, Zhanxing},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Thermostat-assisted continuously-tempered Hamiltonian Monte Carlo for Bayesian learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fcf1d8d2f36c0cde8eca4b86a8fe1df8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fcf1d8d2f36c0cde8eca4b86a8fe1df8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/fcf1d8d2f36c0cde8eca4b86a8fe1df8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fcf1d8d2f36c0cde8eca4b86a8fe1df8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fcf1d8d2f36c0cde8eca4b86a8fe1df8-Reviews.html", "metareview": "", "pdf_size": 4499380, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1359920802371030920&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "University College London; University College London; University College London; Peking University; University College London", "aff_domain": "ucl.ac.uk;cs.ucl.ac.uk;ucl.ac.uk;pku.edu.cn;cs.ucl.ac.uk", "email": "ucl.ac.uk;cs.ucl.ac.uk;ucl.ac.uk;pku.edu.cn;cs.ucl.ac.uk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fcf1d8d2f36c0cde8eca4b86a8fe1df8-Abstract.html", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University College London;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;http://www.pku.edu.cn", "aff_unique_abbr": "UCL;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United Kingdom;China" }, { "title": "Third-order Smoothness Helps: Faster Stochastic Optimization Algorithms for Finding Local Minima", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11446", "id": "11446", "author_site": "Yaodong Yu, Pan Xu, Quanquan Gu", "author": "Yaodong Yu; Pan Xu; Quanquan Gu", "abstract": "We propose stochastic optimization algorithms that can find local minima faster than existing algorithms for nonconvex optimization problems, by exploiting the third-order smoothness to escape non-degenerate saddle points more efficiently. More specifically, the proposed algorithm only needs $\\tilde{O}(\\epsilon^{-10/3})$ stochastic gradient evaluations to converge to an approximate local minimum $\\mathbf{x}$, which satisfies $\\|\\nabla f(\\mathbf{x})\\|_2\\leq\\epsilon$ and $\\lambda_{\\min}(\\nabla^2 f(\\mathbf{x}))\\geq -\\sqrt{\\epsilon}$ in unconstrained stochastic optimization, where $\\tilde{O}(\\cdot)$ hides logarithm polynomial terms and constants. This improves upon the $\\tilde{O}(\\epsilon^{-7/2})$ gradient complexity achieved by the state-of-the-art stochastic local minima finding algorithms by a factor of $\\tilde{O}(\\epsilon^{-1/6})$. Experiments on two nonconvex optimization problems demonstrate the effectiveness of our algorithm and corroborate our theory.", "bibtex": "@inproceedings{NEURIPS2018_fea9c11c,\n author = {Yu, Yaodong and Xu, Pan and Gu, Quanquan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Third-order Smoothness Helps: Faster Stochastic Optimization Algorithms for Finding Local Minima},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/fea9c11c4ad9a395a636ed944a28b51a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/fea9c11c4ad9a395a636ed944a28b51a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/fea9c11c4ad9a395a636ed944a28b51a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/fea9c11c4ad9a395a636ed944a28b51a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/fea9c11c4ad9a395a636ed944a28b51a-Reviews.html", "metareview": "", "pdf_size": 703714, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6731572578813795634&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, University of Virginia; Department of Computer Science, University of California, Los Angeles; Department of Computer Science, University of California, Los Angeles", "aff_domain": "virginia.edu;cs.ucla.edu;cs.ucla.edu", "email": "virginia.edu;cs.ucla.edu;cs.ucla.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/fea9c11c4ad9a395a636ed944a28b51a-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Virginia;University of California, Los Angeles", "aff_unique_dep": "Department of Computer Science;Department of Computer Science", "aff_unique_url": "https://www.virginia.edu;https://www.ucla.edu", "aff_unique_abbr": "UVA;UCLA", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Thwarting Adversarial Examples: An $L_0$-Robust Sparse Fourier Transform", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11955", "id": "11955", "author_site": "Mitali Bafna, Jack Murtagh, Nikhil Vyas", "author": "Mitali Bafna; Jack Murtagh; Nikhil Vyas", "abstract": "We give a new algorithm for approximating the Discrete Fourier transform of an approximately sparse signal that is robust to worst-case $L_0$ corruptions, namely that some coordinates of the signal can be corrupt arbitrarily. Our techniques generalize to a wide range of linear transformations that are used in data analysis such as the Discrete Cosine and Sine transforms, the Hadamard transform, and their high-dimensional analogs. We use our algorithm to successfully defend against worst-case $L_0$ adversaries in the setting of image classification. We give experimental results on the Jacobian-based Saliency Map Attack (JSMA) and the CW $L_0$ attack on the MNIST and Fashion-MNIST datasets as well as the Adversarial Patch on the ImageNet dataset.", "bibtex": "@inproceedings{NEURIPS2018_aef546f2,\n author = {Bafna, Mitali and Murtagh, Jack and Vyas, Nikhil},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Thwarting Adversarial Examples: An L\\_0-Robust Sparse Fourier Transform},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/aef546f29283b6ccef3c61f58fb8e79b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/aef546f29283b6ccef3c61f58fb8e79b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/aef546f29283b6ccef3c61f58fb8e79b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/aef546f29283b6ccef3c61f58fb8e79b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/aef546f29283b6ccef3c61f58fb8e79b-Reviews.html", "metareview": "", "pdf_size": 674966, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4937765559584491989&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "School of Engineering & Applied Sciences, Harvard University; School of Engineering & Applied Sciences, Harvard University; Department of Electrical Engineering and Computer Science, MIT", "aff_domain": "g.harvard.edu;g.harvard.edu;mit.edu", "email": "g.harvard.edu;g.harvard.edu;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/aef546f29283b6ccef3c61f58fb8e79b-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Harvard University;Massachusetts Institute of Technology", "aff_unique_dep": "School of Engineering & Applied Sciences;Department of Electrical Engineering and Computer Science", "aff_unique_url": "https://www.harvard.edu;https://web.mit.edu", "aff_unique_abbr": "Harvard;MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Tight Bounds for Collaborative PAC Learning via Multiplicative Weights", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11360", "id": "11360", "author_site": "Jiecao Chen, Qin Zhang, Yuan Zhou", "author": "Jiecao Chen; Qin Zhang; Yuan Zhou", "abstract": "We study the collaborative PAC learning problem recently proposed in Blum et al.~\\cite{BHPQ17}, in which we have $k$ players and they want to learn a target function collaboratively, such that the learned function approximates the target function well on all players' distributions simultaneously. The quality of the collaborative learning algorithm is measured by the ratio between the sample complexity of the algorithm and that of the learning algorithm for a single distribution (called the overhead). We obtain a collaborative learning algorithm with overhead $O(\\ln k)$, improving the one with overhead $O(\\ln^2 k)$ in \\cite{BHPQ17}. We also show that an $\\Omega(\\ln k)$ overhead is inevitable when $k$ is polynomial bounded by the VC dimension of the hypothesis class. Finally, our experimental study has demonstrated the superiority of our algorithm compared with the one in Blum et al.~\\cite{BHPQ17} on real-world datasets.", "bibtex": "@inproceedings{NEURIPS2018_ed519dac,\n author = {Chen, Jiecao and Zhang, Qin and Zhou, Yuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Tight Bounds for Collaborative PAC Learning via Multiplicative Weights},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ed519dacc89b2bead3f453b0b05a4a8b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ed519dacc89b2bead3f453b0b05a4a8b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ed519dacc89b2bead3f453b0b05a4a8b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ed519dacc89b2bead3f453b0b05a4a8b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ed519dacc89b2bead3f453b0b05a4a8b-Reviews.html", "metareview": "", "pdf_size": 504910, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4365423867756896183&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Computer Science Department, Indiana University at Bloomington; Computer Science Department, Indiana University at Bloomington; Computer Science Department, Indiana University at Bloomington + Department of Industrial and Enterprise Systems Engineering, University of Illinois at Urbana-Champaign", "aff_domain": "iu.edu;indiana.edu;illinois.edu", "email": "iu.edu;indiana.edu;illinois.edu", "github": "", "project": "https://arxiv.org/abs/1805.09217", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ed519dacc89b2bead3f453b0b05a4a8b-Abstract.html", "aff_unique_index": "0;0;0+1", "aff_unique_norm": "Indiana University;University of Illinois Urbana-Champaign", "aff_unique_dep": "Computer Science Department;Department of Industrial and Enterprise Systems Engineering", "aff_unique_url": "https://www.indiana.edu;https://illinois.edu", "aff_unique_abbr": "IU;UIUC", "aff_campus_unique_index": "0;0;0+1", "aff_campus_unique": "Bloomington;Urbana-Champaign", "aff_country_unique_index": "0;0;0+0", "aff_country_unique": "United States" }, { "title": "To Trust Or Not To Trust A Classifier", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11541", "id": "11541", "author_site": "Heinrich Jiang, Been Kim, Melody Guan, Maya Gupta", "author": "Heinrich Jiang; Been Kim; Melody Guan; Maya Gupta", "abstract": "Knowing when a classifier's prediction can be trusted is useful in many applications and critical for safely using AI. While the bulk of the effort in machine learning research has been towards improving classifier performance, understanding when a classifier's predictions should and should not be trusted has received far less attention. The standard approach is to use the classifier's discriminant or confidence score; however, we show there exists an alternative that is more effective in many situations. We propose a new score, called the {\\it trust score}, which measures the agreement between the classifier and a modified nearest-neighbor classifier on the testing example. We show empirically that high (low) trust scores produce surprisingly high precision at identifying correctly (incorrectly) classified examples, consistently outperforming the classifier's confidence score as well as many other baselines. Further, under some mild distributional assumptions, we show that if the trust score for an example is high (low), the classifier will likely agree (disagree) with the Bayes-optimal classifier. Our guarantees consist of non-asymptotic rates of statistical consistency under various nonparametric settings and build on recent developments in topological data analysis.", "bibtex": "@inproceedings{NEURIPS2018_7180cffd,\n author = {Jiang, Heinrich and Kim, Been and Guan, Melody and Gupta, Maya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {To Trust Or Not To Trust A Classifier},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7180cffd6a8e829dacfc2a31b3f72ece-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7180cffd6a8e829dacfc2a31b3f72ece-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7180cffd6a8e829dacfc2a31b3f72ece-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7180cffd6a8e829dacfc2a31b3f72ece-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7180cffd6a8e829dacfc2a31b3f72ece-Reviews.html", "metareview": "", "pdf_size": 1141850, "gs_citation": 596, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9292152849001694574&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Google Research; Google Brain; Stanford University + Google Research; Google Research", "aff_domain": "google.com;google.com;stanford.edu;google.com", "email": "google.com;google.com;stanford.edu;google.com", "github": "https://github.com/google/TrustScore", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7180cffd6a8e829dacfc2a31b3f72ece-Abstract.html", "aff_unique_index": "0;0;1+0;0", "aff_unique_norm": "Google;Stanford University", "aff_unique_dep": "Google Research;", "aff_unique_url": "https://research.google;https://www.stanford.edu", "aff_unique_abbr": "Google Research;Stanford", "aff_campus_unique_index": "0;0;1+0;0", "aff_campus_unique": "Mountain View;Stanford", "aff_country_unique_index": "0;0;0+0;0", "aff_country_unique": "United States" }, { "title": "Toddler-Inspired Visual Object Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11138", "id": "11138", "author_site": "Sven Bambach, David Crandall, Linda Smith, Chen Yu", "author": "Sven Bambach; David Crandall; Linda Smith; Chen Yu", "abstract": "Real-world learning systems have practical limitations on the quality and quantity of the training datasets that they can collect and consider. How should a system go about choosing a subset of the possible training examples that still allows for learning accurate, generalizable models? To help address this question, we draw inspiration from a highly efficient practical learning system: the human child. Using head-mounted cameras, eye gaze trackers, and a model of foveated vision, we collected first-person (egocentric) images that represents a highly accurate approximation of the \"training data\" that toddlers' visual systems collect in everyday, naturalistic learning contexts. We used state-of-the-art computer vision learning models (convolutional neural networks) to help characterize the structure of these data, and found that child data produce significantly better object models than egocentric data experienced by adults in exactly the same environment. By using the CNNs as a modeling tool to investigate the properties of the child data that may enable this rapid learning, we found that child data exhibit a unique combination of quality and diversity, with not only many similar large, high-quality object views but also a greater number and diversity of rare views. This novel methodology of analyzing the visual \"training data\" used by children may not only reveal insights to improve machine learning, but also may suggest new experimental tools to better understand infant learning in developmental psychology.", "bibtex": "@inproceedings{NEURIPS2018_48ab2f9b,\n author = {Bambach, Sven and Crandall, David and Smith, Linda and Yu, Chen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Toddler-Inspired Visual Object Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/48ab2f9b45957ab574cf005eb8a76760-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/48ab2f9b45957ab574cf005eb8a76760-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/48ab2f9b45957ab574cf005eb8a76760-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/48ab2f9b45957ab574cf005eb8a76760-Reviews.html", "metareview": "", "pdf_size": 3737894, "gs_citation": 111, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7443065882986613219&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 19, "aff": "School of Informatics, Computing, and Engineering; School of Informatics, Computing, and Engineering; Dept. of Psychological and Brain Sciences; Dept. of Psychological and Brain Sciences", "aff_domain": "iu.edu;iu.edu;iu.edu;iu.edu", "email": "iu.edu;iu.edu;iu.edu;iu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/48ab2f9b45957ab574cf005eb8a76760-Abstract.html", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Indiana University;University of California, Santa Barbara", "aff_unique_dep": "School of Informatics, Computing, and Engineering;Department of Psychological and Brain Sciences", "aff_unique_url": "https://soic.indiana.edu;https://www.psych.ucsb.edu", "aff_unique_abbr": ";UCSB", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "TopRank: A practical algorithm for online stochastic ranking", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11392", "id": "11392", "author_site": "Tor Lattimore, Branislav Kveton, Shuai Li, Csaba Szepesvari", "author": "Tor Lattimore; Branislav Kveton; Shuai Li; Csaba Szepesvari", "abstract": "Online learning to rank is a sequential decision-making problem where in each round the learning agent chooses a list of items and receives feedback in the form of clicks from the user. Many sample-efficient algorithms have been proposed for this problem that assume a specific click model connecting rankings and user behavior. We propose a generalized click model that encompasses many existing models, including the position-based and cascade models. Our generalization motivates a novel online learning algorithm based on topological sort, which we call TopRank. TopRank is (a) more natural than existing algorithms, (b) has stronger regret guarantees than existing algorithms with comparable generality, (c) has a more insightful proof that leaves the door open to many generalizations, (d) outperforms existing algorithms empirically.", "bibtex": "@inproceedings{NEURIPS2018_de03beff,\n author = {Lattimore, Tor and Kveton, Branislav and Li, Shuai and Szepesvari, Csaba},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {TopRank: A practical algorithm for online stochastic ranking},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/de03beffeed9da5f3639a621bcab5dd4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/de03beffeed9da5f3639a621bcab5dd4-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/de03beffeed9da5f3639a621bcab5dd4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/de03beffeed9da5f3639a621bcab5dd4-Reviews.html", "metareview": "", "pdf_size": 1089837, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10625539131986757063&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/de03beffeed9da5f3639a621bcab5dd4-Abstract.html" }, { "title": "Topkapi: Parallel and Fast Sketches for Finding Top-K Frequent Elements", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12030", "id": "12030", "author_site": "Ankush Mandal, He Jiang, Anshumali Shrivastava, Vivek Sarkar", "author": "Ankush Mandal; He Jiang; Anshumali Shrivastava; Vivek Sarkar", "abstract": "Identifying the top-K frequent items is one of the most common and important operations in large data processing systems. As a result, several solutions have been proposed to solve this problem approximately. In this paper, we identify that in modern distributed settings with both multi-node as well as multi-core parallelism, existing algorithms, although theoretically sound, are suboptimal from the performance perspective. In particular, for identifying top-K frequent items, Count-Min Sketch (CMS) has fantastic update time but lack the important property of reducibility which is needed for exploiting available massive data parallelism. On the other end, popular Frequent algorithm (FA) leads to reducible summaries but the update costs are significant. In this paper, we present Topkapi, a fast and parallel algorithm for finding top-K frequent items, which gives the best of both worlds, i.e., it is reducible as well as efficient update time similar to CMS. Topkapi possesses strong theoretical guarantees and leads to significant performance gains due to increased parallelism, relative to past work.", "bibtex": "@inproceedings{NEURIPS2018_cc06a615,\n author = {Mandal, Ankush and Jiang, He and Shrivastava, Anshumali and Sarkar, Vivek},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Topkapi: Parallel and Fast Sketches for Finding Top-K Frequent Elements},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cc06a6150b92e17dd3076a0f0f9d2af4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cc06a6150b92e17dd3076a0f0f9d2af4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cc06a6150b92e17dd3076a0f0f9d2af4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cc06a6150b92e17dd3076a0f0f9d2af4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cc06a6150b92e17dd3076a0f0f9d2af4-Reviews.html", "metareview": "", "pdf_size": 357917, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17308935081714564523&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "School of Computer Science, Georgia Institute of Technology, Atlanta, GA; Department of Computer Science, Rice University, Houston, TX; Department of Computer Science, Rice University, Houston, TX; School of Computer Science, Georgia Institute of Technology, Atlanta, GA", "aff_domain": "gatech.edu;rice.edu;rice.edu;gatech.edu", "email": "gatech.edu;rice.edu;rice.edu;gatech.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cc06a6150b92e17dd3076a0f0f9d2af4-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Georgia Institute of Technology;Rice University", "aff_unique_dep": "School of Computer Science;Department of Computer Science", "aff_unique_url": "https://www.gatech.edu;https://www.rice.edu", "aff_unique_abbr": "Georgia Tech;Rice", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Atlanta;Houston", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Total stochastic gradient algorithms and applications in reinforcement learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11967", "id": "11967", "author": "Paavo Parmas", "abstract": "Backpropagation and the chain rule of derivatives have been prominent; however,\nthe total derivative rule has not enjoyed the same amount of attention. In this work\nwe show how the total derivative rule leads to an intuitive visual framework for\ncreating gradient estimators on graphical models. In particular, previous \u201dpolicy\ngradient theorems\u201d are easily derived. We derive new gradient estimators based\non density estimation, as well as a likelihood ratio gradient, which \u201djumps\u201d to an\nintermediate node, not directly to the objective function. We evaluate our methods\non model-based policy gradient algorithms, achieve good performance, and present evidence towards demystifying the success of the popular PILCO algorithm.", "bibtex": "@inproceedings{NEURIPS2018_0d59701b,\n author = {Parmas, Paavo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Total stochastic gradient algorithms and applications in reinforcement learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/0d59701b3474225fca5563e015965886-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/0d59701b3474225fca5563e015965886-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/0d59701b3474225fca5563e015965886-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/0d59701b3474225fca5563e015965886-Reviews.html", "metareview": "", "pdf_size": 433766, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17116715867381158258&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Neural Computation Unit, Okinawa Institute of Science and Technology Graduate University, Okinawa, Japan", "aff_domain": "oist.jp", "email": "oist.jp", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/0d59701b3474225fca5563e015965886-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Okinawa Institute of Science and Technology Graduate University", "aff_unique_dep": "Neural Computation Unit", "aff_unique_url": "https://www.oist.jp", "aff_unique_abbr": "OIST", "aff_campus_unique_index": "0", "aff_campus_unique": "Okinawa", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "title": "Towards Deep Conversational Recommendations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11924", "id": "11924", "author_site": "Raymond Li, Samira Ebrahimi Kahou, Hannes Schulz, Vincent Michalski, Laurent Charlin, Chris Pal", "author": "Raymond Li; Samira Ebrahimi Kahou; Hannes Schulz; Vincent Michalski; Laurent Charlin; Chris Pal", "abstract": "There has been growing interest in using neural networks and deep learning techniques to create dialogue systems. Conversational recommendation is an interesting setting for the scientific exploration of dialogue with natural language as the associated discourse involves goal-driven dialogue that often transforms naturally into more free-form chat. This paper provides two contributions. First, until now there has been no publicly available large-scale data set consisting of real-world dialogues centered around recommendations.\nTo address this issue and to facilitate our exploration here, we have collected ReDial, a data set consisting of over 10,000 conversations centered around the theme of providing movie recommendations. We make this data available to the community for further research. Second, we use this dataset to explore multiple facets of conversational recommendations. In particular we explore new neural architectures, mechanisms and methods suitable for composing conversational recommendation systems. Our dataset allows us to systematically probe model sub-components addressing different parts of the overall problem domain ranging from: sentiment analysis and cold-start recommendation generation to detailed aspects of how natural language is used in this setting in the real world. We combine such sub-components into a full-blown dialogue system and examine its behavior.", "bibtex": "@inproceedings{NEURIPS2018_800de15c,\n author = {Li, Raymond and Ebrahimi Kahou, Samira and Schulz, Hannes and Michalski, Vincent and Charlin, Laurent and Pal, Chris},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Towards Deep Conversational Recommendations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/800de15c79c8d840f4e78d3af937d4d4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/800de15c79c8d840f4e78d3af937d4d4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/800de15c79c8d840f4e78d3af937d4d4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/800de15c79c8d840f4e78d3af937d4d4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/800de15c79c8d840f4e78d3af937d4d4-Reviews.html", "metareview": "", "pdf_size": 1056995, "gs_citation": 484, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4021282136473828499&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/800de15c79c8d840f4e78d3af937d4d4-Abstract.html" }, { "title": "Towards Robust Detection of Adversarial Examples", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11451", "id": "11451", "author_site": "Tianyu Pang, Chao Du, Yinpeng Dong, Jun Zhu", "author": "Tianyu Pang; Chao Du; Yinpeng Dong; Jun Zhu", "abstract": "Although the recent progress is substantial, deep learning methods can be vulnerable to the maliciously generated adversarial examples. In this paper, we present a novel training procedure and a thresholding test strategy, towards robust detection of adversarial examples. In training, we propose to minimize the reverse cross-entropy (RCE), which encourages a deep network to learn latent representations that better distinguish adversarial examples from normal ones. In testing, we propose to use a thresholding strategy as the detector to filter out adversarial examples for reliable predictions. Our method is simple to implement using standard algorithms, with little extra training cost compared to the common cross-entropy minimization. We apply our method to defend various attacking methods on the widely used MNIST and CIFAR-10 datasets, and achieve significant improvements on robust predictions under all the threat models in the adversarial setting.", "bibtex": "@inproceedings{NEURIPS2018_e0f7a4d0,\n author = {Pang, Tianyu and Du, Chao and Dong, Yinpeng and Zhu, Jun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Towards Robust Detection of Adversarial Examples},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e0f7a4d0ef9b84b83b693bbf3feb8e6e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e0f7a4d0ef9b84b83b693bbf3feb8e6e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e0f7a4d0ef9b84b83b693bbf3feb8e6e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e0f7a4d0ef9b84b83b693bbf3feb8e6e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e0f7a4d0ef9b84b83b693bbf3feb8e6e-Reviews.html", "metareview": "", "pdf_size": 1027384, "gs_citation": 248, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12795339654045612460&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 16, "aff": "Dept. of Comp. Sci. & Tech., State Key Lab for Intell. Tech. & Systems, BNRist Center, THBI Lab, Tsinghua University, Beijing, China; Dept. of Comp. Sci. & Tech., State Key Lab for Intell. Tech. & Systems, BNRist Center, THBI Lab, Tsinghua University, Beijing, China; Dept. of Comp. Sci. & Tech., State Key Lab for Intell. Tech. & Systems, BNRist Center, THBI Lab, Tsinghua University, Beijing, China; Dept. of Comp. Sci. & Tech., State Key Lab for Intell. Tech. & Systems, BNRist Center, THBI Lab, Tsinghua University, Beijing, China", "aff_domain": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;mail.tsinghua.edu.cn", "email": "mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;mails.tsinghua.edu.cn;mail.tsinghua.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e0f7a4d0ef9b84b83b693bbf3feb8e6e-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Dept. of Comp. Sci. & Tech.", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Towards Robust Interpretability with Self-Explaining Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11746", "id": "11746", "author_site": "David Alvarez-Melis, Tommi Jaakkola", "author": "David Alvarez Melis; Tommi Jaakkola", "abstract": "Most recent work on interpretability of complex machine learning models has focused on estimating a-posteriori explanations for previously trained models around specific predictions. Self-explaining models where interpretability plays a key role already during learning have received much less attention. We propose three desiderata for explanations in general -- explicitness, faithfulness, and stability -- and show that existing methods do not satisfy them. In response, we design self-explaining models in stages, progressively generalizing linear classifiers to complex yet architecturally explicit models. Faithfulness and stability are enforced via regularization specifically tailored to such models. Experimental results across various benchmark datasets show that our framework offers a promising direction for reconciling model complexity and interpretability.", "bibtex": "@inproceedings{NEURIPS2018_3e9f0fc9,\n author = {Alvarez Melis, David and Jaakkola, Tommi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Towards Robust Interpretability with Self-Explaining Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/3e9f0fc9b2f89e043bc6233994dfcf76-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/3e9f0fc9b2f89e043bc6233994dfcf76-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/3e9f0fc9b2f89e043bc6233994dfcf76-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/3e9f0fc9b2f89e043bc6233994dfcf76-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/3e9f0fc9b2f89e043bc6233994dfcf76-Reviews.html", "metareview": "", "pdf_size": 2261958, "gs_citation": 1288, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12647575760438009927&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "CSAIL, MIT; CSAIL, MIT", "aff_domain": "mit.edu;csail.mit.edu", "email": "mit.edu;csail.mit.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/3e9f0fc9b2f89e043bc6233994dfcf76-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.csail.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Towards Text Generation with Adversarially Learned Neural Outlines", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11726", "id": "11726", "author_site": "Sandeep Subramanian, Sai Rajeswar Mudumba, Alessandro Sordoni, Adam Trischler, Aaron Courville, Chris Pal", "author": "Sandeep Subramanian; Sai Rajeswar Mudumba; Alessandro Sordoni; Adam Trischler; Aaron C. Courville; Chris Pal", "abstract": "Recent progress in deep generative models has been fueled by two paradigms -- autoregressive and adversarial models. We propose a combination of both approaches with the goal of learning generative models of text. Our method first produces a high-level sentence outline and then generates words sequentially, conditioning on both the outline and the previous outputs.\nWe generate outlines with an adversarial model trained to approximate the distribution of sentences in a latent space induced by general-purpose sentence encoders. This provides strong, informative conditioning for the autoregressive stage. Our quantitative evaluations suggests that conditioning information from generated outlines is able to guide the autoregressive model to produce realistic samples, comparable to maximum-likelihood trained language models, even at high temperatures with multinomial sampling. Qualitative results also demonstrate that this generative procedure yields natural-looking sentences and interpolations.", "bibtex": "@inproceedings{NEURIPS2018_aaaccd27,\n author = {Subramanian, Sandeep and Mudumba, Sai Rajeswar and Sordoni, Alessandro and Trischler, Adam and Courville, Aaron C and Pal, Chris},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Towards Text Generation with Adversarially Learned Neural Outlines},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/aaaccd2766ec67aecbe26459bb828d81-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/aaaccd2766ec67aecbe26459bb828d81-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/aaaccd2766ec67aecbe26459bb828d81-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/aaaccd2766ec67aecbe26459bb828d81-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/aaaccd2766ec67aecbe26459bb828d81-Reviews.html", "metareview": "", "pdf_size": 396028, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9808478999936203014&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Montr \u00b4eal Institute for Learning Algorithms+Universit \u00b4e de Montr \u00b4eal+CIFAR Fellow; Montr \u00b4eal Institute for Learning Algorithms+Universit \u00b4e de Montr \u00b4eal+Element AI; Microsoft Research Montr \u00b4eal; Microsoft Research Montr \u00b4eal; Montr \u00b4eal Institute for Learning Algorithms+Universit \u00b4e de Montr \u00b4eal+CIFAR Fellow; \u00b4Ecole Polytechnique de Montr \u00b4eal+Element AI", "aff_domain": "umontreal.ca;umontreal.ca;microsoft.com;microsoft.com;umontreal.ca;polymtl.ca", "email": "umontreal.ca;umontreal.ca;microsoft.com;microsoft.com;umontreal.ca;polymtl.ca", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/aaaccd2766ec67aecbe26459bb828d81-Abstract.html", "aff_unique_index": "0+1+2;0+1+3;4;4;0+1+2;5+3", "aff_unique_norm": "Montr\u00e9al Institute for Learning Algorithms;Universit\u00e9 de Montr\u00e9al;CIFAR;Element AI;Microsoft;Ecole Polytechnique de Montr\u00e9al", "aff_unique_dep": "Learning Algorithms;;;;Research;", "aff_unique_url": "https://mila.quebec;https://www.umontreal.ca;https://www.cifar.ca;https://www.elementai.com;https://www.microsoft.com/en-us/research/group/microsoft-research-montreal;https://www.polymtl.ca", "aff_unique_abbr": "MILA;UdeM;CIFAR;Element AI;MSR Montreal;Polytechnique Montr\u00e9al", "aff_campus_unique_index": ";;1;1;;2", "aff_campus_unique": ";Montreal;Montr\u00e9al", "aff_country_unique_index": "0+0+0;0+0+0;0;0;0+0+0;0+0", "aff_country_unique": "Canada" }, { "title": "Towards Understanding Acceleration Tradeoff between Momentum and Asynchrony in Nonconvex Stochastic Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11368", "id": "11368", "author_site": "Tianyi Liu, Shiyang Li, Jianping Shi, Enlu Zhou, Tuo Zhao", "author": "Tianyi Liu; Shiyang Li; Jianping Shi; Enlu Zhou; Tuo Zhao", "abstract": "Asynchronous momentum stochastic gradient descent algorithms (Async-MSGD) have been widely used in distributed machine learning, e.g., training large collaborative filtering systems and deep neural networks. Due to current technical limit, however, establishing convergence properties of Async-MSGD for these highly complicated nonoconvex problems is generally infeasible. Therefore, we propose to analyze the algorithm through a simpler but nontrivial nonconvex problems --- streaming PCA. This allows us to make progress toward understanding Aync-MSGD and gaining new insights for more general problems. Specifically, by exploiting the diffusion approximation of stochastic optimization, we establish the asymptotic rate of convergence of Async-MSGD for streaming PCA. Our results indicate a fundamental tradeoff between asynchrony and momentum: To ensure convergence and acceleration through asynchrony, we have to reduce the momentum (compared with Sync-MSGD). To the best of our knowledge, this is the first theoretical attempt on understanding Async-MSGD for distributed nonconvex stochastic optimization. Numerical experiments on both streaming PCA and training deep neural networks are provided to support our findings for Async-MSGD.", "bibtex": "@inproceedings{NEURIPS2018_f9d11525,\n author = {Liu, Tianyi and Li, Shiyang and Shi, Jianping and Zhou, Enlu and Zhao, Tuo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Towards Understanding Acceleration Tradeoff between Momentum and Asynchrony in Nonconvex Stochastic Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f9d1152547c0bde01830b7e8bd60024c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f9d1152547c0bde01830b7e8bd60024c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/f9d1152547c0bde01830b7e8bd60024c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f9d1152547c0bde01830b7e8bd60024c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f9d1152547c0bde01830b7e8bd60024c-Reviews.html", "metareview": "", "pdf_size": 1785249, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4698673141571663169&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "School of Industrial and System Engineering, Georgia Institute of Technology; Harbin Institue of Technology; Sensetime Group Limited; School of Industrial and System Engineering, Georgia Institute of Technology; School of Industrial and System Engineering, Georgia Institute of Technology", "aff_domain": "gatech.edu;gmail.com;sensetime.com;isye.gatech.edu;isye.gatech.edu", "email": "gatech.edu;gmail.com;sensetime.com;isye.gatech.edu;isye.gatech.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f9d1152547c0bde01830b7e8bd60024c-Abstract.html", "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Georgia Institute of Technology;Harbin Institute of Technology;SenseTime Group", "aff_unique_dep": "School of Industrial and System Engineering;;", "aff_unique_url": "https://www.gatech.edu;http://www.hit.edu.cn/;https://www.sensetime.com", "aff_unique_abbr": "Georgia Tech;HIT;SenseTime", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Atlanta;Harbin;", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;China" }, { "title": "Towards Understanding Learning Representations: To What Extent Do Different Neural Networks Learn the Same Representation", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11911", "id": "11911", "author_site": "Liwei Wang, Lunjia Hu, Jiayuan Gu, Zhiqiang Hu, Yue Wu, Kun He, John Hopcroft", "author": "Liwei Wang; Lunjia Hu; Jiayuan Gu; Zhiqiang Hu; Yue Wu; Kun He; John Hopcroft", "abstract": "It is widely believed that learning good representations is one of the main reasons for the success of deep neural networks. Although highly intuitive, there is a lack of theory and systematic approach quantitatively characterizing what representations do deep neural networks learn. In this work, we move a tiny step towards a theory and better understanding of the representations. Specifically, we study a simpler problem: How similar are the representations learned by two networks with identical architecture but trained from different initializations. We develop a rigorous theory based on the neuron activation subspace match model. The theory gives a complete characterization of the structure of neuron activation subspace matches, where the core concepts are maximum match and simple match which describe the overall and the finest similarity between sets of neurons in two networks respectively. We also propose efficient algorithms to find the maximum match and simple matches. Finally, we conduct extensive experiments using our algorithms. Experimental results suggest that, surprisingly, representations learned by the same convolutional layers of networks trained from different initializations are not as similar as prevalently expected, at least in terms of subspace match.", "bibtex": "@inproceedings{NEURIPS2018_5fc34ed3,\n author = {Wang, Liwei and Hu, Lunjia and Gu, Jiayuan and Hu, Zhiqiang and Wu, Yue and He, Kun and Hopcroft, John},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Towards Understanding Learning Representations: To What Extent Do Different Neural Networks Learn the Same Representation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5fc34ed307aac159a30d81181c99847e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5fc34ed307aac159a30d81181c99847e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5fc34ed307aac159a30d81181c99847e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5fc34ed307aac159a30d81181c99847e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5fc34ed307aac159a30d81181c99847e-Reviews.html", "metareview": "", "pdf_size": 376560, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=401428033641216502&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Key Laboratory of Machine Perception, MOE, School of EECS, Peking University + Center for Data Science, Peking University, Beijing Institute of Big Data Research; Computer Science Department, Stanford University; Key Laboratory of Machine Perception, MOE, School of EECS, Peking University; Key Laboratory of Machine Perception, MOE, School of EECS, Peking University; Key Laboratory of Machine Perception, MOE, School of EECS, Peking University; Huazhong University of Science and Technology; Cornell University", "aff_domain": "cis.pku.edu.cn;stanford.edu;pku.edu.cn;pku.edu.cn;pku.edu.cn;hust.edu.cn;cornell.edu", "email": "cis.pku.edu.cn;stanford.edu;pku.edu.cn;pku.edu.cn;pku.edu.cn;hust.edu.cn;cornell.edu", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5fc34ed307aac159a30d81181c99847e-Abstract.html", "aff_unique_index": "0+0;1;0;0;0;2;3", "aff_unique_norm": "Peking University;Stanford University;Huazhong University of Science and Technology;Cornell University", "aff_unique_dep": "School of EECS;Computer Science Department;;", "aff_unique_url": "http://www.pku.edu.cn;https://www.stanford.edu;http://www.hust.edu.cn;https://www.cornell.edu", "aff_unique_abbr": "Peking U;Stanford;HUST;Cornell", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Beijing;Stanford", "aff_country_unique_index": "0+0;1;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "title": "Trading robust representations for sample complexity through self-supervised visual experience", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11914", "id": "11914", "author_site": "Andrea Tacchetti, Stephen Voinea, Georgios Evangelopoulos", "author": "Andrea Tacchetti; Stephen Voinea; Georgios Evangelopoulos", "abstract": "Learning in small sample regimes is among the most remarkable features of the human perceptual system. This ability is related to robustness to transformations, which is acquired through visual experience in the form of weak- or self-supervision during development. We explore the idea of allowing artificial systems to learn representations of visual stimuli through weak supervision prior to downstream supervised tasks. We introduce a novel loss function for representation learning using unlabeled image sets and video sequences, and experimentally demonstrate that these representations support one-shot learning and reduce the sample complexity of multiple recognition tasks. We establish the existence of a trade-off between the sizes of weakly supervised, automatically obtained from video sequences, and fully supervised data sets. Our results suggest that equivalence sets other than class labels, which are abundant in unlabeled visual experience, can be used for self-supervised learning of semantically relevant image embeddings.", "bibtex": "@inproceedings{NEURIPS2018_c3443361,\n author = {Tacchetti, Andrea and Voinea, Stephen and Evangelopoulos, Georgios},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Trading robust representations for sample complexity through self-supervised visual experience},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c344336196d5ec19bd54fd14befdde87-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c344336196d5ec19bd54fd14befdde87-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c344336196d5ec19bd54fd14befdde87-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c344336196d5ec19bd54fd14befdde87-Reviews.html", "metareview": "", "pdf_size": 3513154, "gs_citation": 1, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7873620096499031873&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "The Center for Brains, Minds and Machines, MIT + DeepMind; The Center for Brains, Minds and Machines, MIT; The Center for Brains, Minds and Machines, MIT + X, Alphabet", "aff_domain": "mit.edu;mit.edu;mit.edu", "email": "mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c344336196d5ec19bd54fd14befdde87-Abstract.html", "aff_unique_index": "0+1;0;0+2", "aff_unique_norm": "Massachusetts Institute of Technology;DeepMind;Alphabet Inc.", "aff_unique_dep": "The Center for Brains, Minds and Machines;;", "aff_unique_url": "https://web.mit.edu/;https://deepmind.com;https://abc.xyz", "aff_unique_abbr": "MIT;DeepMind;Alphabet", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0+1;0;0+0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Training DNNs with Hybrid Block Floating Point", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11069", "id": "11069", "author_site": "Mario Drumond, Tao Lin, Martin Jaggi, Babak Falsafi", "author": "Mario Drumond; Tao LIN; Martin Jaggi; Babak Falsafi", "abstract": "The wide adoption of DNNs has given birth to unrelenting computing requirements, forcing datacenter operators to adopt domain-specific accelerators to train them. These accelerators typically employ densely packed full-precision floating-point arithmetic to maximize performance per area. Ongoing research efforts seek to further increase that performance density by replacing floating-point with fixed-point arithmetic. However, a significant roadblock for these attempts has been fixed point's narrow dynamic range, which is insufficient for DNN training convergence. We identify block floating point (BFP) as a promising alternative representation since it exhibits wide dynamic range and enables the majority of DNN operations to be performed with fixed-point logic. Unfortunately, BFP alone introduces several limitations that preclude its direct applicability. In this work, we introduce HBFP, a hybrid BFP-FP approach, which performs all dot products in BFP and other operations in floating point. HBFP delivers the best of both worlds: the high accuracy of floating point at the superior hardware density of fixed point. For a wide variety of models, we show that HBFP matches floating point's accuracy while enabling hardware implementations that deliver up to 8.5x higher throughput.", "bibtex": "@inproceedings{NEURIPS2018_6a9aeddf,\n author = {Drumond, Mario and LIN, Tao and Jaggi, Martin and Falsafi, Babak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Training DNNs with Hybrid Block Floating Point},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6a9aeddfc689c1d0e3b9ccc3ab651bc5-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6a9aeddfc689c1d0e3b9ccc3ab651bc5-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6a9aeddfc689c1d0e3b9ccc3ab651bc5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6a9aeddfc689c1d0e3b9ccc3ab651bc5-Reviews.html", "metareview": "", "pdf_size": 480657, "gs_citation": 129, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8513210716239323910&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Ecocloud, EPFL; Ecocloud, EPFL; Ecocloud, EPFL; Ecocloud, EPFL", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;epfl.ch", "email": "epfl.ch;epfl.ch;epfl.ch;epfl.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6a9aeddfc689c1d0e3b9ccc3ab651bc5-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "Ecocloud", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Training Deep Models Faster with Robust, Approximate Importance Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11700", "id": "11700", "author_site": "Tyler Johnson, Carlos Guestrin", "author": "Tyler B Johnson; Carlos Guestrin", "abstract": "In theory, importance sampling speeds up stochastic gradient algorithms for supervised learning by prioritizing training examples. In practice, the cost of computing importances greatly limits the impact of importance sampling. We propose a robust, approximate importance sampling procedure (RAIS) for stochastic gradient de- scent. By approximating the ideal sampling distribution using robust optimization, RAIS provides much of the benefit of exact importance sampling with drastically reduced overhead. Empirically, we find RAIS-SGD and standard SGD follow similar learning curves, but RAIS moves faster through these paths, achieving speed-ups of at least 20% and sometimes much more.", "bibtex": "@inproceedings{NEURIPS2018_967990de,\n author = {Johnson, Tyler B and Guestrin, Carlos},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Training Deep Models Faster with Robust, Approximate Importance Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/967990de5b3eac7b87d49a13c6834978-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/967990de5b3eac7b87d49a13c6834978-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/967990de5b3eac7b87d49a13c6834978-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/967990de5b3eac7b87d49a13c6834978-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/967990de5b3eac7b87d49a13c6834978-Reviews.html", "metareview": "", "pdf_size": 3857866, "gs_citation": 159, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6593664310921849554&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "University of Washington, Seattle; University of Washington, Seattle", "aff_domain": "washington.edu;cs.washington.edu", "email": "washington.edu;cs.washington.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/967990de5b3eac7b87d49a13c6834978-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Training Deep Neural Networks with 8-bit Floating Point Numbers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11737", "id": "11737", "author_site": "Naigang Wang, Jungwook Choi, Daniel Brand, Chia-Yu Chen, Kailash Gopalakrishnan", "author": "Naigang Wang; Jungwook Choi; Daniel Brand; Chia-Yu Chen; Kailash Gopalakrishnan", "abstract": "The state-of-the-art hardware platforms for training deep neural networks are moving from traditional single precision (32-bit) computations towards 16 bits of precision - in large part due to the high energy efficiency and smaller bit storage associated with using reduced-precision representations. However, unlike inference, training with numbers represented with less than 16 bits has been challenging due to the need to maintain fidelity of the gradient computations during back-propagation. Here we demonstrate, for the first time, the successful training of deep neural networks using 8-bit floating point numbers while fully maintaining the accuracy on a spectrum of deep learning models and datasets. In addition to reducing the data and computation precision to 8 bits, we also successfully reduce the arithmetic precision for additions (used in partial product accumulation and weight updates) from 32 bits to 16 bits through the introduction of a number of key ideas including chunk-based accumulation and floating point stochastic rounding. The use of these novel techniques lays the foundation for a new generation of hardware training platforms with the potential for 2-4 times improved throughput over today's systems.", "bibtex": "@inproceedings{NEURIPS2018_335d3d1c,\n author = {Wang, Naigang and Choi, Jungwook and Brand, Daniel and Chen, Chia-Yu and Gopalakrishnan, Kailash},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Training Deep Neural Networks with 8-bit Floating Point Numbers},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/335d3d1cd7ef05ec77714a215134914c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/335d3d1cd7ef05ec77714a215134914c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/335d3d1cd7ef05ec77714a215134914c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/335d3d1cd7ef05ec77714a215134914c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/335d3d1cd7ef05ec77714a215134914c-Reviews.html", "metareview": "", "pdf_size": 818435, "gs_citation": 672, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17273460269230846713&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/335d3d1cd7ef05ec77714a215134914c-Abstract.html" }, { "title": "Training Neural Networks Using Features Replay", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11643", "id": "11643", "author_site": "Zhouyuan Huo, Bin Gu, Heng Huang", "author": "Zhouyuan Huo; Bin Gu; Heng Huang", "abstract": "Training a neural network using backpropagation algorithm requires passing error gradients sequentially through the network.\nThe backward locking prevents us from updating network layers in parallel and fully leveraging the computing resources. Recently, there are several works trying to decouple and parallelize the backpropagation algorithm. However, all of them suffer from severe accuracy loss or memory explosion when the neural network is deep. To address these challenging issues, we propose a novel parallel-objective formulation for the objective function of the neural network. After that, we introduce features replay algorithm and prove that it is guaranteed to converge to critical points for the non-convex problem under certain conditions. Finally, we apply our method to training deep convolutional neural networks, and the experimental results show that the proposed method achieves {faster} convergence, {lower} memory consumption, and {better} generalization error than compared methods.", "bibtex": "@inproceedings{NEURIPS2018_a36b598a,\n author = {Huo, Zhouyuan and Gu, Bin and Huang, Heng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Training Neural Networks Using Features Replay},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a36b598abb934e4528412e5a2127b931-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a36b598abb934e4528412e5a2127b931-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a36b598abb934e4528412e5a2127b931-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a36b598abb934e4528412e5a2127b931-Reviews.html", "metareview": "", "pdf_size": 935411, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3898237747030029547&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Electrical and Computer Engineering, University of Pittsburgh+JDDGlobal.com; JDDGlobal.com; Electrical and Computer Engineering, University of Pittsburgh+JDDGlobal.com", "aff_domain": "pitt.edu;gmail.com;pitt.edu", "email": "pitt.edu;gmail.com;pitt.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a36b598abb934e4528412e5a2127b931-Abstract.html", "aff_unique_index": "0+1;1;0+1", "aff_unique_norm": "University of Pittsburgh;JDD Global", "aff_unique_dep": "Electrical and Computer Engineering;", "aff_unique_url": "https://www.pitt.edu;", "aff_unique_abbr": "Pitt;", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "title": "Training deep learning based denoisers without ground truth data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11329", "id": "11329", "author_site": "Shakarim Soltanayev, Se Young Chun", "author": "Shakarim Soltanayev; Se Young Chun", "abstract": "Recently developed deep-learning-based denoisers often outperform state-of-the-art conventional denoisers, such as the BM3D. They are typically trained to minimizethe mean squared error (MSE) between the output image of a deep neural networkand a ground truth image. In deep learning based denoisers, it is important to use high quality noiseless ground truth data for high performance, but it is often challenging or even infeasible to obtain noiseless images in application areas such as hyperspectral remote sensing and medical imaging. In this article, we propose a method based on Stein\u2019s unbiased risk estimator (SURE) for training deep neural network denoisers only based on the use of noisy images. We demonstrate that our SURE-based method, without the use of ground truth data, is able to train deep neural network denoisers to yield performances close to those networks trained with ground truth, and to outperform the state-of-the-art denoiser BM3D. Further improvements were achieved when noisy test images were used for training of denoiser networks using our proposed SURE-based method.", "bibtex": "@inproceedings{NEURIPS2018_c0560792,\n author = {Soltanayev, Shakarim and Chun, Se Young},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Training deep learning based denoisers without ground truth data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c0560792e4a3c79e62f76cbf9fb277dd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c0560792e4a3c79e62f76cbf9fb277dd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c0560792e4a3c79e62f76cbf9fb277dd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c0560792e4a3c79e62f76cbf9fb277dd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c0560792e4a3c79e62f76cbf9fb277dd-Reviews.html", "metareview": "", "pdf_size": 1672018, "gs_citation": 162, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10949844547317882495&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Electrical Engineering, Ulsan National Institute of Science and Technology (UNIST), Republic of Korea; Department of Electrical Engineering, Ulsan National Institute of Science and Technology (UNIST), Republic of Korea", "aff_domain": "unist.ac.kr;unist.ac.kr", "email": "unist.ac.kr;unist.ac.kr", "github": "https://github.com/Shakarim94/Net-SURE", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c0560792e4a3c79e62f76cbf9fb277dd-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Ulsan National Institute of Science and Technology", "aff_unique_dep": "Department of Electrical Engineering", "aff_unique_url": "https://www.unist.ac.kr", "aff_unique_abbr": "UNIST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ulsan", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "title": "Trajectory Convolution for Action Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11231", "id": "11231", "author_site": "Yue Zhao, Yuanjun Xiong, Dahua Lin", "author": "Yue Zhao; Yuanjun Xiong; Dahua Lin", "abstract": "How to leverage the temporal dimension is a key question in video analysis. Recent works suggest an efficient approach to video feature learning, i.e.,\nfactorizing 3D convolutions into separate components respectively for spatial and temporal convolutions. The temporal convolution, however, comes with an implicit assumption \u2013 the feature maps across time steps are well aligned so that the features at the same locations can be aggregated. This assumption may be overly strong in practical applications, especially in action recognition where the motion serves as a crucial cue. In this work, we propose a new CNN architecture TrajectoryNet, which incorporates trajectory convolution, a new operation for integrating features along the temporal dimension, to replace the existing temporal convolution. This operation explicitly takes into account the changes in contents caused by deformation or motion, allowing the visual features to be aggregated along the the motion paths, trajectories. On two large-scale action recognition datasets, namely, Something-Something and Kinetics, the proposed network architecture achieves notable improvement over strong baselines.", "bibtex": "@inproceedings{NEURIPS2018_8597a6cf,\n author = {Zhao, Yue and Xiong, Yuanjun and Lin, Dahua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Trajectory Convolution for Action Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8597a6cfa74defcbde3047c891d78f90-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8597a6cfa74defcbde3047c891d78f90-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8597a6cfa74defcbde3047c891d78f90-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8597a6cfa74defcbde3047c891d78f90-Reviews.html", "metareview": "", "pdf_size": 933278, "gs_citation": 123, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4952689779001585153&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Information Engineering, The Chinese University of Hong Kong; Amazon Rekognition; Department of Information Engineering, The Chinese University of Hong Kong", "aff_domain": "ie.cuhk.edu.hk;amazon.com;ie.cuhk.edu.hk", "email": "ie.cuhk.edu.hk;amazon.com;ie.cuhk.edu.hk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8597a6cfa74defcbde3047c891d78f90-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Amazon", "aff_unique_dep": "Department of Information Engineering;Amazon Rekognition", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.amazon.com/rekognition", "aff_unique_abbr": "CUHK;Amazon Rekognition", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "title": "Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11442", "id": "11442", "author_site": "Ye Jia, Yu Zhang, Ron Weiss, Quan Wang, Jonathan Shen, Fei Ren, zhifeng Chen, Patrick Nguyen, Ruoming Pang, Ignacio Lopez Moreno, Yonghui Wu", "author": "Ye Jia; Yu Zhang; Ron Weiss; Quan Wang; Jonathan Shen; Fei Ren; zhifeng Chen; Patrick Nguyen; Ruoming Pang; Ignacio Lopez Moreno; Yonghui Wu", "abstract": "We describe a neural network-based system for text-to-speech (TTS) synthesis that is able to generate speech audio in the voice of many different speakers, including those unseen during training. Our system consists of three independently trained components: (1) a speaker encoder network, trained on a speaker verification task using an independent dataset of noisy speech from thousands of speakers without transcripts, to generate a fixed-dimensional embedding vector from seconds of reference speech from a target speaker; (2) a sequence-to-sequence synthesis network based on Tacotron 2, which generates a mel spectrogram from text, conditioned on the speaker embedding; (3) an auto-regressive WaveNet-based vocoder that converts the mel spectrogram into a sequence of time domain waveform samples. We demonstrate that the proposed model is able to transfer the knowledge of speaker variability learned by the discriminatively-trained speaker encoder to the new task, and is able to synthesize natural speech from speakers that were not seen during training. We quantify the importance of training the speaker encoder on a large and diverse speaker set in order to obtain the best generalization performance. Finally, we show that randomly sampled speaker embeddings can be used to synthesize speech in the voice of novel speakers dissimilar from those used in training, indicating that the model has learned a high quality speaker representation.", "bibtex": "@inproceedings{NEURIPS2018_6832a7b2,\n author = {Jia, Ye and Zhang, Yu and Weiss, Ron and Wang, Quan and Shen, Jonathan and Ren, Fei and Chen, zhifeng and Nguyen, Patrick and Pang, Ruoming and Lopez Moreno, Ignacio and Wu, Yonghui},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6832a7b24bc06775d02b7406880b93fc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6832a7b24bc06775d02b7406880b93fc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6832a7b24bc06775d02b7406880b93fc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6832a7b24bc06775d02b7406880b93fc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6832a7b24bc06775d02b7406880b93fc-Reviews.html", "metareview": "", "pdf_size": 666454, "gs_citation": 1086, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14002861277903846410&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Google Inc.; Google Inc.; Google Inc.; Google Inc.; Google Inc.; Google Inc.; Google Inc.; Google Inc.; Google Inc.; Google Inc.; Google Inc.", "aff_domain": "google.com;google.com;google.com; ; ; ; ; ; ; ; ", "email": "google.com;google.com;google.com; ; ; ; ; ; ; ; ", "github": "", "project": "", "author_num": 11, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6832a7b24bc06775d02b7406880b93fc-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Transfer Learning with Neural AutoML", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11799", "id": "11799", "author_site": "Catherine Wong, Neil Houlsby, Yifeng Lu, Andrea Gesmundo", "author": "Catherine Wong; Neil Houlsby; Yifeng Lu; Andrea Gesmundo", "abstract": "We reduce the computational cost of Neural AutoML with transfer learning. AutoML relieves human effort by automating the design of ML algorithms. Neural AutoML has become popular for the design of deep learning architectures, however, this method has a high computation cost. To address this we propose Transfer Neural AutoML that uses knowledge from prior tasks to speed up network design. We extend RL-based architecture search methods to support parallel training on multiple tasks and then transfer the search strategy to new tasks.\nOn language and image classification data, Transfer Neural AutoML reduces convergence time over single-task training by over an order of magnitude on many tasks.", "bibtex": "@inproceedings{NEURIPS2018_bdb3c278,\n author = {Wong, Catherine and Houlsby, Neil and Lu, Yifeng and Gesmundo, Andrea},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Transfer Learning with Neural AutoML},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/bdb3c278f45e6734c35733d24299d3f4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/bdb3c278f45e6734c35733d24299d3f4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/bdb3c278f45e6734c35733d24299d3f4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/bdb3c278f45e6734c35733d24299d3f4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/bdb3c278f45e6734c35733d24299d3f4-Reviews.html", "metareview": "", "pdf_size": 368980, "gs_citation": 137, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8288912230427369041&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "MIT; Google Brain; Google Brain; Google Brain", "aff_domain": "mit.edu;google.com;google.com;google.com", "email": "mit.edu;google.com;google.com;google.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/bdb3c278f45e6734c35733d24299d3f4-Abstract.html", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Google", "aff_unique_dep": ";Google Brain", "aff_unique_url": "https://web.mit.edu;https://brain.google.com", "aff_unique_abbr": "MIT;Google Brain", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Transfer of Deep Reactive Policies for MDP Planning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12036", "id": "12036", "author_site": "Aniket (Nick) Bajpai, Sankalp Garg, Mausam", "author": "Aniket (Nick) Bajpai; Sankalp Garg; Mausam", "abstract": "Domain-independent probabilistic planners input an MDP description in a factored representation language such as PPDDL or RDDL, and exploit the specifics of the representation for faster planning. Traditional algorithms operate on each problem instance independently, and good methods for transferring experience from policies of other instances of a domain to a new instance do not exist. Recently, researchers have begun exploring the use of deep reactive policies, trained via deep reinforcement learning (RL), for MDP planning domains. One advantage of deep reactive policies is that they are more amenable to transfer learning.", "bibtex": "@inproceedings{NEURIPS2018_74627b65,\n author = {Bajpai, Aniket (Nick) and Garg, Sankalp and Mausam},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Transfer of Deep Reactive Policies for MDP Planning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/74627b65e6e6a4c21e06809b8e02114a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/74627b65e6e6a4c21e06809b8e02114a-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/74627b65e6e6a4c21e06809b8e02114a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/74627b65e6e6a4c21e06809b8e02114a-Reviews.html", "metareview": "", "pdf_size": 740543, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4580400729732661142&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Indian Institute of Technology, Delhi; Indian Institute of Technology, Delhi; Indian Institute of Technology, Delhi", "aff_domain": "gmail.com;gmail.com;cse.iitd.ac.in", "email": "gmail.com;gmail.com;cse.iitd.ac.in", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/74627b65e6e6a4c21e06809b8e02114a-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Indian Institute of Technology Delhi", "aff_unique_dep": "", "aff_unique_url": "https://www.iitdelhi.ac.in", "aff_unique_abbr": "IIT Delhi", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Delhi", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "title": "Transfer of Value Functions via Variational Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11599", "id": "11599", "author_site": "Andrea Tirinzoni, Rafael Rodriguez Sanchez, Marcello Restelli", "author": "Andrea Tirinzoni; Rafael Rodriguez Sanchez; Marcello Restelli", "abstract": "We consider the problem of transferring value functions in reinforcement learning. We propose an approach that uses the given source tasks to learn a prior distribution over optimal value functions and provide an efficient variational approximation of the corresponding posterior in a new target task. We show our approach to be general, in the sense that it can be combined with complex parametric function approximators and distribution models, while providing two practical algorithms based on Gaussians and Gaussian mixtures. We theoretically analyze them by deriving a finite-sample analysis and provide a comprehensive empirical evaluation in four different domains.", "bibtex": "@inproceedings{NEURIPS2018_9023effe,\n author = {Tirinzoni, Andrea and Rodriguez Sanchez, Rafael and Restelli, Marcello},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Transfer of Value Functions via Variational Methods},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9023effe3c16b0477df9b93e26d57e2c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9023effe3c16b0477df9b93e26d57e2c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9023effe3c16b0477df9b93e26d57e2c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9023effe3c16b0477df9b93e26d57e2c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9023effe3c16b0477df9b93e26d57e2c-Reviews.html", "metareview": "", "pdf_size": 599901, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2943100548672847340&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Politecnico di Milano; Politecnico di Milano; Politecnico di Milano", "aff_domain": "polimi.it;polimi.it;polimi.it", "email": "polimi.it;polimi.it;polimi.it", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9023effe3c16b0477df9b93e26d57e2c-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Politecnico di Milano", "aff_unique_dep": "", "aff_unique_url": "https://www.polimi.it", "aff_unique_abbr": "Polimi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Italy" }, { "title": "Tree-to-tree Neural Networks for Program Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11263", "id": "11263", "author_site": "Xinyun Chen, Chang Liu, Dawn Song", "author": "Xinyun Chen; Chang Liu; Dawn Song", "abstract": "Program translation is an important tool to migrate legacy code in one language into an ecosystem built in a different language. In this work, we are the first to employ deep neural networks toward tackling this problem. We observe that program translation is a modular procedure, in which a sub-tree of the source tree is translated into the corresponding target sub-tree at each step. To capture this intuition, we design a tree-to-tree neural network to translate a source tree into a target one. Meanwhile, we develop an attention mechanism for the tree-to-tree model, so that when the decoder expands one non-terminal in the target tree, the attention mechanism locates the corresponding sub-tree in the source tree to guide the expansion of the decoder. We evaluate the program translation capability of our tree-to-tree model against several state-of-the-art approaches. Compared against other neural translation models, we observe that our approach is consistently better than the baselines with a margin of up to 15 points. Further, our approach can improve the previous state-of-the-art program translation approaches by a margin of 20 points on the translation of real-world projects.", "bibtex": "@inproceedings{NEURIPS2018_d759175d,\n author = {Chen, Xinyun and Liu, Chang and Song, Dawn},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Tree-to-tree Neural Networks for Program Translation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d759175de8ea5b1d9a2660e45554894f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d759175de8ea5b1d9a2660e45554894f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d759175de8ea5b1d9a2660e45554894f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d759175de8ea5b1d9a2660e45554894f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d759175de8ea5b1d9a2660e45554894f-Reviews.html", "metareview": "", "pdf_size": 393719, "gs_citation": 344, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5077386959127255183&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "UC Berkeley; UC Berkeley; UC Berkeley", "aff_domain": "berkeley.edu;gmail.com;cs.berkeley.edu", "email": "berkeley.edu;gmail.com;cs.berkeley.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d759175de8ea5b1d9a2660e45554894f-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Turbo Learning for CaptionBot and DrawingBot", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11624", "id": "11624", "author_site": "Qiuyuan Huang, Pengchuan Zhang, Dapeng Wu, Lei Zhang", "author": "Qiuyuan Huang; Pengchuan Zhang; Dapeng Wu; Lei Zhang", "abstract": "We study in this paper the problems of both image captioning and\ntext-to-image generation, and present a novel turbo learning\napproach to jointly training an image-to-text generator (a.k.a.\nCaptionBot) and a text-to-image generator (a.k.a. DrawingBot). The\nkey idea behind the joint training is that image-to-text\ngeneration and text-to-image generation as dual problems can form\na closed loop to provide informative feedback to each other. Based\non such feedback, we introduce a new loss metric by comparing the\noriginal input with the output produced by the closed loop. In\naddition to the old loss metrics used in CaptionBot and\nDrawingBot, this extra loss metric makes the jointly trained\nCaptionBot and DrawingBot better than the separately trained\nCaptionBot and DrawingBot. Furthermore, the turbo-learning\napproach enables semi-supervised learning since the closed loop\ncan provide peudo-labels for unlabeled samples. Experimental\nresults on the COCO dataset demonstrate that the proposed turbo\nlearning can significantly improve the performance of both\nCaptionBot and DrawingBot by a large margin.", "bibtex": "@inproceedings{NEURIPS2018_976abf49,\n author = {Huang, Qiuyuan and Zhang, Pengchuan and Wu, Dapeng and Zhang, Lei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Turbo Learning for CaptionBot and DrawingBot},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/976abf49974d4686f87192efa0513ae0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/976abf49974d4686f87192efa0513ae0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/976abf49974d4686f87192efa0513ae0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/976abf49974d4686f87192efa0513ae0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/976abf49974d4686f87192efa0513ae0-Reviews.html", "metareview": "", "pdf_size": 3485700, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6336046719883176050&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Microsoft Research, Redmond, WA, USA; Microsoft Research, Redmond, WA, USA; University of Florida, Gainesville, FL, USA; Microsoft Research, Redmond, WA, USA", "aff_domain": "microsoft.com;microsoft.com;ieee.org;microsoft.com", "email": "microsoft.com;microsoft.com;ieee.org;microsoft.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/976abf49974d4686f87192efa0513ae0-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Microsoft;University of Florida", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.ufl.edu", "aff_unique_abbr": "MSR;UF", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Redmond;Gainesville", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Uncertainty Sampling is Preconditioned Stochastic Gradient Descent on Zero-One Loss", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11670", "id": "11670", "author_site": "Stephen Mussmann, Percy Liang", "author": "Stephen Mussmann; Percy Liang", "abstract": "Uncertainty sampling, a popular active learning algorithm, is used to reduce the amount of data required to learn a classifier, but it has been observed in practice to converge to different parameters depending on the initialization and sometimes to even better parameters than standard training on all the data. In this work, we give a theoretical explanation of this phenomenon, showing that uncertainty sampling on a convex (e.g., logistic) loss can be interpreted as performing a preconditioned stochastic gradient step on the population zero-one loss. Experiments on synthetic and real datasets support this connection.", "bibtex": "@inproceedings{NEURIPS2018_5abdf8b8,\n author = {Mussmann, Stephen and Liang, Percy S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Uncertainty Sampling is Preconditioned Stochastic Gradient Descent on Zero-One Loss},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/5abdf8b8520b71f3a528c7547ee92428-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/5abdf8b8520b71f3a528c7547ee92428-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/5abdf8b8520b71f3a528c7547ee92428-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/5abdf8b8520b71f3a528c7547ee92428-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/5abdf8b8520b71f3a528c7547ee92428-Reviews.html", "metareview": "", "pdf_size": 714650, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8894347958018096550&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, Stanford University, Stanford, CA; Department of Computer Science, Stanford University, Stanford, CA", "aff_domain": "stanford.edu;cs.stanford.edu", "email": "stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/5abdf8b8520b71f3a528c7547ee92428-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Uncertainty-Aware Attention for Reliable Interpretation and Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11112", "id": "11112", "author_site": "Jay Heo, Hae Beom Lee, Saehoon Kim, Juho Lee, Kwang Joon Kim, Eunho Yang, Sung Ju Hwang", "author": "Jay Heo; Hae Beom Lee; Saehoon Kim; Juho Lee; Kwang Joon Kim; Eunho Yang; Sung Ju Hwang", "abstract": "Attention mechanism is effective in both focusing the deep learning models on relevant features and interpreting them. However, attentions may be unreliable since the networks that generate them are often trained in a weakly-supervised manner. To overcome this limitation, we introduce the notion of input-dependent uncertainty to the attention mechanism, such that it generates attention for each feature with varying degrees of noise based on the given input, to learn larger variance on instances it is uncertain about. We learn this Uncertainty-aware Attention (UA) mechanism using variational inference, and validate it on various risk prediction tasks from electronic health records on which our model significantly outperforms existing attention models. The analysis of the learned attentions shows that our model generates attentions that comply with clinicians' interpretation, and provide richer interpretation via learned variance. Further evaluation of both the accuracy of the uncertainty calibration and the prediction performance with \"I don't know'' decision show that UA yields networks with high reliability as well.", "bibtex": "@inproceedings{NEURIPS2018_285e19f2,\n author = {Heo, Jay and Lee, Hae Beom and Kim, Saehoon and Lee, Juho and Kim, Kwang Joon and Yang, Eunho and Hwang, Sung Ju},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Uncertainty-Aware Attention for Reliable Interpretation and Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/285e19f20beded7d215102b49d5c09a0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/285e19f20beded7d215102b49d5c09a0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/285e19f20beded7d215102b49d5c09a0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/285e19f20beded7d215102b49d5c09a0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/285e19f20beded7d215102b49d5c09a0-Reviews.html", "metareview": "", "pdf_size": 573523, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9559997504040390908&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "KAIST+AItrics+UNIST; KAIST+AItrics; AItrics; AItrics+University of Oxford; Yonsei University College of Medicine; KAIST+AItrics; KAIST+AItrics", "aff_domain": "kaist.ac.kr;kaist.ac.kr;aitrics.com;stats.ox.ac.uk;yuhs.ac;kaist.ac.kr;kaist.ac.kr", "email": "kaist.ac.kr;kaist.ac.kr;aitrics.com;stats.ox.ac.uk;yuhs.ac;kaist.ac.kr;kaist.ac.kr", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/285e19f20beded7d215102b49d5c09a0-Abstract.html", "aff_unique_index": "0+1+2;0+1;1;1+3;4;0+1;0+1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;AITRICS;Ulsan National Institute of Science and Technology;University of Oxford;Yonsei University", "aff_unique_dep": ";;;;College of Medicine", "aff_unique_url": "https://www.kaist.ac.kr;;https://www.unist.ac.kr;https://www.ox.ac.uk;https://www.yonsei.ac.kr", "aff_unique_abbr": "KAIST;;UNIST;Oxford;Yonsei", "aff_campus_unique_index": ";;;;", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0;2;0;0;0", "aff_country_unique": "South Korea;;United Kingdom" }, { "title": "Understanding Batch Normalization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11739", "id": "11739", "author_site": "Johan Bjorck, Carla Gomes, Bart Selman, Kilian Weinberger", "author": "Nils Bjorck; Carla P. Gomes; Bart Selman; Kilian Q. Weinberger", "abstract": "Batch normalization (BN) is a technique to normalize activations in intermediate layers of deep neural networks. Its tendency to improve accuracy and speed up training have established BN as a favorite technique in deep learning. Yet, despite its enormous success, there remains little consensus on the exact reason and mechanism behind these improvements. In this paper we take a step towards a better understanding of BN, following an empirical approach. We conduct several experiments, and show that BN primarily enables training with larger learning rates, which is the cause for faster convergence and better generalization. For networks without BN we demonstrate how large gradient updates can result in diverging loss and activations growing uncontrollably with network depth, which limits possible learning rates. BN avoids this problem by constantly correcting activations to be zero-mean and of unit standard deviation, which enables larger gradient steps, yields faster convergence and may help bypass sharp local minima. We further show various ways in which gradients and activations of deep unnormalized networks are ill-behaved. We contrast our results against recent findings in random matrix theory, shedding new light on classical initialization schemes and their consequences.", "bibtex": "@inproceedings{NEURIPS2018_36072923,\n author = {Bjorck, Nils and Gomes, Carla P and Selman, Bart and Weinberger, Kilian Q},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Understanding Batch Normalization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/36072923bfc3cf47745d704feb489480-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/36072923bfc3cf47745d704feb489480-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/36072923bfc3cf47745d704feb489480-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/36072923bfc3cf47745d704feb489480-Reviews.html", "metareview": "", "pdf_size": 2101790, "gs_citation": 1018, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7242620323839134535&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Cornell University; Cornell University; Cornell University; Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu;cornell.edu", "email": "cornell.edu;cornell.edu;cornell.edu;cornell.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/36072923bfc3cf47745d704feb489480-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Understanding Regularized Spectral Clustering via Graph Conductance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12005", "id": "12005", "author_site": "Yilin Zhang, Karl Rohe", "author": "Yilin Zhang; Karl Rohe", "abstract": "This paper uses the relationship between graph conductance and spectral clustering to study (i) the failures of spectral clustering and (ii) the benefits of regularization. The explanation is simple. Sparse and stochastic graphs create several", "bibtex": "@inproceedings{NEURIPS2018_2a845d4d,\n author = {Zhang, Yilin and Rohe, Karl},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Understanding Regularized Spectral Clustering via Graph Conductance},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2a845d4d23b883acb632fefd814e175f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2a845d4d23b883acb632fefd814e175f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/2a845d4d23b883acb632fefd814e175f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2a845d4d23b883acb632fefd814e175f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2a845d4d23b883acb632fefd814e175f-Reviews.html", "metareview": "", "pdf_size": 538000, "gs_citation": 126, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10391085645857666347&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Statistics, University of Wisconsin-Madison; Department of Statistics, University of Wisconsin-Madison", "aff_domain": "wisc.edu;wisc.edu", "email": "wisc.edu;wisc.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2a845d4d23b883acb632fefd814e175f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Wisconsin-Madison", "aff_unique_dep": "Department of Statistics", "aff_unique_url": "https://www.wisc.edu", "aff_unique_abbr": "UW-Madison", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Understanding Weight Normalized Deep Neural Networks with Rectified Linear Units", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11040", "id": "11040", "author_site": "Yixi Xu, Xiao Wang", "author": "Yixi Xu; Xiao Wang", "abstract": "This paper presents a general framework for norm-based capacity control for $L_{p,q}$ weight normalized deep neural networks. We establish the upper bound on the Rademacher complexities of this family. With an $L_{p,q}$ normalization where $q\\le p^*$ and $1/p+1/p^{*}=1$, we discuss properties of a width-independent capacity control, which only depends on the depth by a square root term. We further analyze the approximation properties of $L_{p,q}$ weight normalized deep neural networks. In particular, for an $L_{1,\\infty}$ weight normalized network, the approximation error can be controlled by the $L_1$ norm of the output layer, and the corresponding generalization error only depends on the architecture by the square root of the depth.", "bibtex": "@inproceedings{NEURIPS2018_812b4ba2,\n author = {Xu, Yixi and Wang, Xiao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Understanding Weight Normalized Deep Neural Networks with Rectified Linear Units},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/812b4ba287f5ee0bc9d43bbf5bbe87fb-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/812b4ba287f5ee0bc9d43bbf5bbe87fb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/812b4ba287f5ee0bc9d43bbf5bbe87fb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/812b4ba287f5ee0bc9d43bbf5bbe87fb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/812b4ba287f5ee0bc9d43bbf5bbe87fb-Reviews.html", "metareview": "", "pdf_size": 343025, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18055799987215605116&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Department of Statistics, Purdue University; Department of Statistics, Purdue University", "aff_domain": "purdue.edu;purdue.edu", "email": "purdue.edu;purdue.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/812b4ba287f5ee0bc9d43bbf5bbe87fb-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "Department of Statistics", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Understanding the Role of Adaptivity in Machine Teaching: The Case of Version Space Learners", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11163", "id": "11163", "author_site": "Yuxin Chen, Adish Singla, Oisin Mac Aodha, Pietro Perona, Yisong Yue", "author": "Yuxin Chen; Adish Singla; Oisin Mac Aodha; Pietro Perona; Yisong Yue", "abstract": "In real-world applications of education, an effective teacher adaptively chooses the next example to teach based on the learner\u2019s current state. However, most existing work in algorithmic machine teaching focuses on the batch setting, where adaptivity plays no role. In this paper, we study the case of teaching consistent, version space learners in an interactive setting. At any time step, the teacher provides an example, the learner performs an update, and the teacher observes the learner\u2019s new state. We highlight that adaptivity does not speed up the teaching process when considering existing models of version space learners, such as the \u201cworst-case\u201d model (the learner picks the next hypothesis randomly from the version space) and the \u201cpreference-based\u201d model (the learner picks hypothesis according to some global preference). Inspired by human teaching, we propose a new model where the learner picks hypotheses according to some local preference defined by the current hypothesis. We show that our model exhibits several desirable properties, e.g., adaptivity plays a key role, and the learner\u2019s transitions over hypotheses are smooth/interpretable. We develop adaptive teaching algorithms, and demonstrate our results via simulation and user studies.", "bibtex": "@inproceedings{NEURIPS2018_bf62768c,\n author = {Chen, Yuxin and Singla, Adish and Mac Aodha, Oisin and Perona, Pietro and Yue, Yisong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Understanding the Role of Adaptivity in Machine Teaching: The Case of Version Space Learners},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/bf62768ca46b6c3b5bea9515d1a1fc45-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/bf62768ca46b6c3b5bea9515d1a1fc45-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/bf62768ca46b6c3b5bea9515d1a1fc45-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/bf62768ca46b6c3b5bea9515d1a1fc45-Reviews.html", "metareview": "", "pdf_size": 433887, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4589156983003057946&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 16, "aff": "Caltech; MPI-SWS+Caltech; Caltech; Caltech; Caltech", "aff_domain": "caltech.edu;mpi-sws.org;caltech.edu;caltech.edu;caltech.edu", "email": "caltech.edu;mpi-sws.org;caltech.edu;caltech.edu;caltech.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/bf62768ca46b6c3b5bea9515d1a1fc45-Abstract.html", "aff_unique_index": "0;1+0;0;0;0", "aff_unique_norm": "California Institute of Technology;Max Planck Institute for Software Systems", "aff_unique_dep": ";", "aff_unique_url": "https://www.caltech.edu;https://www.mpi-sws.org", "aff_unique_abbr": "Caltech;MPI-SWS", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Pasadena;", "aff_country_unique_index": "0;1+0;0;0;0", "aff_country_unique": "United States;Germany" }, { "title": "Uniform Convergence of Gradients for Non-Convex Learning and Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11835", "id": "11835", "author_site": "Dylan Foster, Ayush Sekhari, Karthik Sridharan", "author": "Dylan J Foster; Ayush Sekhari; Karthik Sridharan", "abstract": "We investigate 1) the rate at which refined properties of the empirical risk---in particular, gradients---converge to their population counterparts in standard non-convex learning tasks, and 2) the consequences of this convergence for optimization. Our analysis follows the tradition of norm-based capacity control. We propose vector-valued Rademacher complexities as a simple, composable, and user-friendly tool to derive dimension-free uniform convergence bounds for gradients in non-convex learning problems. As an application of our techniques, we give a new analysis of batch gradient descent methods for non-convex generalized linear models and non-convex robust regression, showing how to use any algorithm that finds approximate stationary points to obtain optimal sample complexity, even when dimension is high or possibly infinite and multiple passes over the dataset are allowed.", "bibtex": "@inproceedings{NEURIPS2018_59ab3ba9,\n author = {Foster, Dylan J and Sekhari, Ayush and Sridharan, Karthik},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Uniform Convergence of Gradients for Non-Convex Learning and Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/59ab3ba90ae4b4ab84fe69de7b8e3f5f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/59ab3ba90ae4b4ab84fe69de7b8e3f5f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/59ab3ba90ae4b4ab84fe69de7b8e3f5f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/59ab3ba90ae4b4ab84fe69de7b8e3f5f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/59ab3ba90ae4b4ab84fe69de7b8e3f5f-Reviews.html", "metareview": "", "pdf_size": 1230937, "gs_citation": 90, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5012790294617401349&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "aff": "Cornell University; Cornell University; Cornell University", "aff_domain": "cornell.edu;cs.cornell.edu;cs.cornell.edu", "email": "cornell.edu;cs.cornell.edu;cs.cornell.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/59ab3ba90ae4b4ab84fe69de7b8e3f5f-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Universal Growth in Production Economies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11209", "id": "11209", "author_site": "Simina Branzei, Ruta Mehta, Noam Nisan", "author": "Simina Branzei; Ruta Mehta; Noam Nisan", "abstract": "We study a simple variant of the von Neumann model of an expanding economy, in which multiple producers make goods according to their production function. The players trade their goods at the market and then use the bundles received as inputs for the production in the next round. The decision that players have to make is how to invest their money (i.e. bids) in each round.", "bibtex": "@inproceedings{NEURIPS2018_692f93be,\n author = {Branzei, Simina and Mehta, Ruta and Nisan, Noam},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Universal Growth in Production Economies},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/692f93be8c7a41525c0baf2076aecfb4-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/692f93be8c7a41525c0baf2076aecfb4-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/692f93be8c7a41525c0baf2076aecfb4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/692f93be8c7a41525c0baf2076aecfb4-Reviews.html", "metareview": "", "pdf_size": 53123, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8005858633910459303&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Purdue University; University of Illinois, Urbana-Champaign; Hebrew University and Microsoft Research", "aff_domain": "purdue.edu;illinois.edu;cs.huji.ac.il", "email": "purdue.edu;illinois.edu;cs.huji.ac.il", "github": "", "project": "https://arxiv.org/abs/1802.07385", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/692f93be8c7a41525c0baf2076aecfb4-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Purdue University;University of Illinois;Hebrew University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.purdue.edu;https://illinois.edu;http://www.huji.ac.il", "aff_unique_abbr": "Purdue;UIUC;HUJI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Unorganized Malicious Attacks Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11672", "id": "11672", "author_site": "Ming Pang, Wei Gao, Min Tao, Zhi-Hua Zhou", "author": "Ming Pang; Wei Gao; Min Tao; Zhi-Hua Zhou", "abstract": "Recommender systems have attracted much attention during the past decade. Many attack detection algorithms have been developed for better recommendations, mostly focusing on shilling attacks, where an attack organizer produces a large number of user profiles by the same strategy to promote or demote an item. This work considers another different attack style: unorganized malicious attacks, where attackers individually utilize a small number of user profiles to attack different items without organizer. This attack style occurs in many real applications, yet relevant study remains open. We formulate the unorganized malicious attacks detection as a matrix completion problem, and propose the Unorganized Malicious Attacks detection (UMA) algorithm, based on the alternating splitting augmented Lagrangian method. We verify, both theoretically and empirically, the effectiveness of the proposed approach.", "bibtex": "@inproceedings{NEURIPS2018_322f6246,\n author = {Pang, Ming and Gao, Wei and Tao, Min and Zhou, Zhi-Hua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unorganized Malicious Attacks Detection},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/322f62469c5e3c7dc3e58f5a4d1ea399-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/322f62469c5e3c7dc3e58f5a4d1ea399-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/322f62469c5e3c7dc3e58f5a4d1ea399-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/322f62469c5e3c7dc3e58f5a4d1ea399-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/322f62469c5e3c7dc3e58f5a4d1ea399-Reviews.html", "metareview": "", "pdf_size": 378662, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=544755978865820611&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, 210023, China; National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, 210023, China; National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, 210023, China; National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, 210023, China", "aff_domain": "lamda.nju.edu.cn;lamda.nju.edu.cn;nju.edu.cn;lamda.nju.edu.cn", "email": "lamda.nju.edu.cn;lamda.nju.edu.cn;nju.edu.cn;lamda.nju.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/322f62469c5e3c7dc3e58f5a4d1ea399-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "National Key Laboratory for Novel Software Technology", "aff_unique_url": "http://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Nanjing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Unsupervised Adversarial Invariance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11498", "id": "11498", "author_site": "Ayush Jaiswal, Rex Yue Wu, Wael Abd-Almageed, Prem Natarajan", "author": "Ayush Jaiswal; Rex Yue Wu; Wael Abd-Almageed; Prem Natarajan", "abstract": "Data representations that contain all the information about target variables but are invariant to nuisance factors benefit supervised learning algorithms by preventing them from learning associations between these factors and the targets, thus reducing overfitting. We present a novel unsupervised invariance induction framework for neural networks that learns a split representation of data through competitive training between the prediction task and a reconstruction task coupled with disentanglement, without needing any labeled information about nuisance factors or domain knowledge. We describe an adversarial instantiation of this framework and provide analysis of its working. Our unsupervised model outperforms state-of-the-art methods, which are supervised, at inducing invariance to inherent nuisance factors, effectively using synthetic data augmentation to learn invariance, and domain adaptation. Our method can be applied to any prediction task, eg., binary/multi-class classification or regression, without loss of generality.", "bibtex": "@inproceedings{NEURIPS2018_03e7ef47,\n author = {Jaiswal, Ayush and Wu, Rex Yue and Abd-Almageed, Wael and Natarajan, Prem},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Adversarial Invariance},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/03e7ef47cee6fa4ae7567394b99912b7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/03e7ef47cee6fa4ae7567394b99912b7-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/03e7ef47cee6fa4ae7567394b99912b7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/03e7ef47cee6fa4ae7567394b99912b7-Reviews.html", "metareview": "", "pdf_size": 4943509, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14906538554109276732&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "USC Information Sciences Institute; USC Information Sciences Institute; USC Information Sciences Institute; USC Information Sciences Institute", "aff_domain": "isi.edu;isi.edu;isi.edu;isi.edu", "email": "isi.edu;isi.edu;isi.edu;isi.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/03e7ef47cee6fa4ae7567394b99912b7-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "Information Sciences Institute", "aff_unique_url": "https://isi.usc.edu", "aff_unique_abbr": "USC ISI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unsupervised Attention-guided Image-to-Image Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11369", "id": "11369", "author_site": "Youssef Alami Mejjati, Christian Richardt, James Tompkin, Darren Cosker, Kwang In Kim", "author": "Youssef Alami Mejjati; Christian Richardt; James Tompkin; Darren Cosker; Kwang In Kim", "abstract": "Current unsupervised image-to-image translation techniques struggle to focus their attention on individual objects without altering the background or the way multiple objects interact within a scene. Motivated by the important role of attention in human perception, we tackle this limitation by introducing unsupervised attention mechanisms which are jointly adversarially trained with the generators and discriminators. We empirically demonstrate that our approach is able to attend to relevant regions in the image without requiring any additional supervision, and that by doing so it achieves more realistic mappings compared to recent approaches.", "bibtex": "@inproceedings{NEURIPS2018_4e87337f,\n author = {Alami Mejjati, Youssef and Richardt, Christian and Tompkin, James and Cosker, Darren and Kim, Kwang In},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Attention-guided Image-to-Image Translation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4e87337f366f72daa424dae11df0538c-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4e87337f366f72daa424dae11df0538c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4e87337f366f72daa424dae11df0538c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4e87337f366f72daa424dae11df0538c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4e87337f366f72daa424dae11df0538c-Reviews.html", "metareview": "", "pdf_size": 2529587, "gs_citation": 408, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=912464851779595905&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "University of Bath; University of Bath; Brown University; University of Bath; University of Bath", "aff_domain": "bath.ac.uk;richardt.name;brown.edu;bath.ac.uk;bath.ac.uk", "email": "bath.ac.uk;richardt.name;brown.edu;bath.ac.uk;bath.ac.uk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4e87337f366f72daa424dae11df0538c-Abstract.html", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Bath;Brown University", "aff_unique_dep": ";", "aff_unique_url": "https://www.bath.ac.uk;https://www.brown.edu", "aff_unique_abbr": "Bath;Brown", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Unsupervised Cross-Modal Alignment of Speech and Text Embedding Spaces", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11708", "id": "11708", "author_site": "Yu-An Chung, Wei-Hung Weng, Schrasing Tong, Jim Glass", "author": "Yu-An Chung; Wei-Hung Weng; Schrasing Tong; James Glass", "abstract": "Recent research has shown that word embedding spaces learned from text corpora of different languages can be aligned without any parallel data supervision. Inspired by the success in unsupervised cross-lingual word embeddings, in this paper we target learning a cross-modal alignment between the embedding spaces of speech and text learned from corpora of their respective modalities in an unsupervised fashion. The proposed framework learns the individual speech and text embedding spaces, and attempts to align the two spaces via adversarial training, followed by a refinement procedure. We show how our framework could be used to perform the tasks of spoken word classification and translation, and the experimental results on these two tasks demonstrate that the performance of our unsupervised alignment approach is comparable to its supervised counterpart. Our framework is especially useful for developing automatic speech recognition (ASR) and speech-to-text translation systems for low- or zero-resource languages, which have little parallel audio-text data for training modern supervised ASR and speech-to-text translation models, but account for the majority of the languages spoken across the world.", "bibtex": "@inproceedings{NEURIPS2018_1ea97de8,\n author = {Chung, Yu-An and Weng, Wei-Hung and Tong, Schrasing and Glass, James},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Cross-Modal Alignment of Speech and Text Embedding Spaces},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1ea97de85eb634d580161c603422437f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1ea97de85eb634d580161c603422437f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1ea97de85eb634d580161c603422437f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1ea97de85eb634d580161c603422437f-Reviews.html", "metareview": "", "pdf_size": 299057, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8077332226757167864&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 18, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1ea97de85eb634d580161c603422437f-Abstract.html" }, { "title": "Unsupervised Depth Estimation, 3D Face Rotation and Replacement", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11925", "id": "11925", "author_site": "Joel Ruben Antony Moniz, Christopher Beckham, Simon Rajotte, Sina Honari, Chris Pal", "author": "Joel Ruben Antony Moniz; Christopher Beckham; Simon Rajotte; Sina Honari; Chris Pal", "abstract": "We present an unsupervised approach for learning to estimate three dimensional (3D) facial structure from a single image while also predicting 3D viewpoint transformations that match a desired pose and facial geometry.\nWe achieve this by inferring the depth of facial keypoints of an input image in an unsupervised manner, without using any form of ground-truth depth information. We show how it is possible to use these depths as intermediate computations within a new backpropable loss to predict the parameters of a 3D affine transformation matrix that maps inferred 3D keypoints of an input face to the corresponding 2D keypoints on a desired target facial geometry or pose.\nOur resulting approach, called DepthNets, can therefore be used to infer plausible 3D transformations from one face pose to another, allowing faces to be frontalized, transformed into 3D models or even warped to another pose and facial geometry.\nLastly, we identify certain shortcomings with our formulation, and explore adversarial image translation techniques as a post-processing step to re-synthesize complete head shots for faces re-targeted to different poses or identities.", "bibtex": "@inproceedings{NEURIPS2018_9a1335ef,\n author = {Moniz, Joel Ruben Antony and Beckham, Christopher and Rajotte, Simon and Honari, Sina and Pal, Chris},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Depth Estimation, 3D Face Rotation and Replacement},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/9a1335ef5ffebb0de9d089c4182e4868-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/9a1335ef5ffebb0de9d089c4182e4868-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/9a1335ef5ffebb0de9d089c4182e4868-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/9a1335ef5ffebb0de9d089c4182e4868-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/9a1335ef5ffebb0de9d089c4182e4868-Reviews.html", "metareview": "", "pdf_size": 2086211, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2371681385764042999&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Carnegie Mellon University; Mila-University of Montreal+Polytechnique Montreal; Mila-University of Montreal+Polytechnique Montreal; Mila-University of Montreal; Mila-University of Montreal+Polytechnique Montreal+Element AI", "aff_domain": "andrew.cmu.edu; ; ;iro.umontreal.ca; ", "email": "andrew.cmu.edu; ; ;iro.umontreal.ca; ", "github": "https://github.com/joelmoniz/DepthNets", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/9a1335ef5ffebb0de9d089c4182e4868-Abstract.html", "aff_unique_index": "0;1+2;1+2;1;1+2+3", "aff_unique_norm": "Carnegie Mellon University;University of Montreal;Polytechnique Montreal;Element AI", "aff_unique_dep": ";Mila;;", "aff_unique_url": "https://www.cmu.edu;https://www.mila.quebec;https://www.polymtl.ca;https://www.elementai.com", "aff_unique_abbr": "CMU;Mila;PolyMTL;Element AI", "aff_campus_unique_index": "1+1;1+1;1;1+1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1+1;1+1;1;1+1+1", "aff_country_unique": "United States;Canada" }, { "title": "Unsupervised Image-to-Image Translation Using Domain-Specific Variational Information Bound", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11979", "id": "11979", "author_site": "Hadi Kazemi, Sobhan Soleymani, Fariborz Taherkhani, Seyed Iranmanesh, Nasser Nasrabadi", "author": "Hadi Kazemi; Sobhan Soleymani; Fariborz Taherkhani; Seyed Iranmanesh; Nasser Nasrabadi", "abstract": "Unsupervised image-to-image translation is a class of computer vision problems which aims at modeling conditional distribution of images in the target domain, given a set of unpaired images in the source and target domains. An image in the source domain might have multiple representations in the target domain. Therefore, ambiguity in modeling of the conditional distribution arises, specially when the images in the source and target domains come from different modalities. Current approaches mostly rely on simplifying assumptions to map both domains into a shared-latent space. Consequently, they are only able to model the domain-invariant information between the two modalities. These approaches cannot model domain-specific information which has no representation in the target domain. In this work, we propose an unsupervised image-to-image translation framework which maximizes a domain-specific variational information bound and learns the target domain-invariant representation of the two domain. The proposed framework makes it possible to map a single source image into multiple images in the target domain, utilizing several target domain-specific codes sampled randomly from the prior distribution, or extracted from reference images.", "bibtex": "@inproceedings{NEURIPS2018_c7c46d4b,\n author = {Kazemi, Hadi and Soleymani, Sobhan and Taherkhani, Fariborz and Iranmanesh, Seyed and Nasrabadi, Nasser},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Image-to-Image Translation Using Domain-Specific Variational Information Bound},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c7c46d4baf816bfb07c7f3bf96d88544-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c7c46d4baf816bfb07c7f3bf96d88544-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c7c46d4baf816bfb07c7f3bf96d88544-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c7c46d4baf816bfb07c7f3bf96d88544-Reviews.html", "metareview": "", "pdf_size": 4293064, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13739192126130402474&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "West Virginia University; West Virginia University; ; West Virginia University; West Virginia University", "aff_domain": "mix.wvu.edu;mix.wvu.edu;gmail.com;mix.wvu.edu;mail.wvu.edu", "email": "mix.wvu.edu;mix.wvu.edu;gmail.com;mix.wvu.edu;mail.wvu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c7c46d4baf816bfb07c7f3bf96d88544-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "West Virginia University", "aff_unique_dep": "", "aff_unique_url": "https://www.wvu.edu", "aff_unique_abbr": "WVU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unsupervised Learning of Artistic Styles with Archetypal Style Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11636", "id": "11636", "author_site": "Daan Wynen, Cordelia Schmid, Julien Mairal", "author": "Daan Wynen; Cordelia Schmid; Julien Mairal", "abstract": "In this paper, we introduce an unsupervised learning approach to automatically dis-\ncover, summarize, and manipulate artistic styles from large collections of paintings.\nOur method is based on archetypal analysis, which is an unsupervised learning\ntechnique akin to sparse coding with a geometric interpretation. When applied\nto deep image representations from a data collection, it learns a dictionary of\narchetypal styles, which can be easily visualized. After training the model, the style\nof a new image, which is characterized by local statistics of deep visual features,\nis approximated by a sparse convex combination of archetypes. This allows us\nto interpret which archetypal styles are present in the input image, and in which\nproportion. Finally, our approach allows us to manipulate the coefficients of the\nlatent archetypal decomposition, and achieve various special effects such as style\nenhancement, transfer, and interpolation between multiple archetypes.", "bibtex": "@inproceedings{NEURIPS2018_09060616,\n author = {Wynen, Daan and Schmid, Cordelia and Mairal, Julien},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Learning of Artistic Styles with Archetypal Style Analysis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/09060616068d2b9544dc33f2fbe4ce2d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/09060616068d2b9544dc33f2fbe4ce2d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/09060616068d2b9544dc33f2fbe4ce2d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/09060616068d2b9544dc33f2fbe4ce2d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/09060616068d2b9544dc33f2fbe4ce2d-Reviews.html", "metareview": "", "pdf_size": 9998201, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5936984475360789600&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Univ. Grenoble Alpes, Inria, CNRS, Grenoble INP\u2217, LJK; Univ. Grenoble Alpes, Inria, CNRS, Grenoble INP\u2217, LJK; Univ. Grenoble Alpes, Inria, CNRS, Grenoble INP\u2217, LJK", "aff_domain": "inria.fr;inria.fr;inria.fr", "email": "inria.fr;inria.fr;inria.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/09060616068d2b9544dc33f2fbe4ce2d-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Universit\u00e9 Grenoble Alpes", "aff_unique_dep": "", "aff_unique_url": "https://www.univ-grenoble-alpes.fr", "aff_unique_abbr": "UGA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Grenoble", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Unsupervised Learning of Object Landmarks through Conditional Image Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11399", "id": "11399", "author_site": "Tomas Jakab, Ankush Gupta, Hakan Bilen, Andrea Vedaldi", "author": "Tomas Jakab; Ankush Gupta; Hakan Bilen; Andrea Vedaldi", "abstract": "We propose a method for learning landmark detectors for visual objects (such as the eyes and the nose in a face) without any manual supervision. We cast this as the problem of generating images that combine the appearance of the object as seen in a first example image with the geometry of the object as seen in a second example image, where the two examples differ by a viewpoint change and/or an object deformation. In order to factorize appearance and geometry, we introduce a tight bottleneck in the geometry-extraction process that selects and distils geometry-related features. Compared to standard image generation problems, which often use generative adversarial networks, our generation task is conditioned on both appearance and geometry and thus is significantly less ambiguous, to the point that adopting a simple perceptual loss formulation is sufficient. We demonstrate that our approach can learn object landmarks from synthetic image deformations or videos, all without manual supervision, while outperforming state-of-the-art unsupervised landmark detectors. We further show that our method is applicable to a large variety of datasets - faces, people, 3D objects, and digits - without any modifications.", "bibtex": "@inproceedings{NEURIPS2018_1f36c15d,\n author = {Jakab, Tomas and Gupta, Ankush and Bilen, Hakan and Vedaldi, Andrea},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Learning of Object Landmarks through Conditional Image Generation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1f36c15d6a3d18d52e8d493bc8187cb9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1f36c15d6a3d18d52e8d493bc8187cb9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1f36c15d6a3d18d52e8d493bc8187cb9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1f36c15d6a3d18d52e8d493bc8187cb9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1f36c15d6a3d18d52e8d493bc8187cb9-Reviews.html", "metareview": "", "pdf_size": 2063336, "gs_citation": 285, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11901841152417185118&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Visual Geometry Group, University of Oxford; Visual Geometry Group, University of Oxford; School of Informatics, University of Edinburgh; Visual Geometry Group, University of Oxford", "aff_domain": "robots.ox.ac.uk;robots.ox.ac.uk;ed.ac.uk;robots.ox.ac.uk", "email": "robots.ox.ac.uk;robots.ox.ac.uk;ed.ac.uk;robots.ox.ac.uk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1f36c15d6a3d18d52e8d493bc8187cb9-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Oxford;University of Edinburgh", "aff_unique_dep": "Visual Geometry Group;School of Informatics", "aff_unique_url": "https://www.ox.ac.uk;https://www.ed.ac.uk", "aff_unique_abbr": "Oxford;Edinburgh", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Oxford;Edinburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Unsupervised Learning of Shape and Pose with Differentiable Point Clouds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11287", "id": "11287", "author_site": "Eldar Insafutdinov, Alexey Dosovitskiy", "author": "Eldar Insafutdinov; Alexey Dosovitskiy", "abstract": "We address the problem of learning accurate 3D shape and camera pose from a collection of unlabeled category-specific images. We train a convolutional network to predict both the shape and the pose from a single image by minimizing the reprojection error: given several views of an object, the projections of the predicted shapes to the predicted camera poses should match the provided views. To deal with pose ambiguity, we introduce an ensemble of pose predictors which we then distill to a single \"student\" model. To allow for efficient learning of high-fidelity shapes, we represent the shapes by point clouds and devise a formulation allowing for differentiable projection of these. Our experiments show that the distilled ensemble of pose predictors learns to estimate the pose accurately, while the point cloud representation allows to predict detailed shape models.", "bibtex": "@inproceedings{NEURIPS2018_4e8412ad,\n author = {Insafutdinov, Eldar and Dosovitskiy, Alexey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Learning of Shape and Pose with Differentiable Point Clouds},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4e8412ad48562e3c9934f45c3e144d48-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4e8412ad48562e3c9934f45c3e144d48-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4e8412ad48562e3c9934f45c3e144d48-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4e8412ad48562e3c9934f45c3e144d48-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4e8412ad48562e3c9934f45c3e144d48-Reviews.html", "metareview": "", "pdf_size": 4485439, "gs_citation": 276, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17863075261159636613&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Max Planck Institute for Informatics; Intel Labs", "aff_domain": "mpi-inf.mpg.de;gmail.com", "email": "mpi-inf.mpg.de;gmail.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4e8412ad48562e3c9934f45c3e144d48-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Max Planck Institute for Informatics;Intel", "aff_unique_dep": ";Intel Labs", "aff_unique_url": "https://mpi-inf.mpg.de;https://www.intel.com", "aff_unique_abbr": "MPII;Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;United States" }, { "title": "Unsupervised Learning of View-invariant Action Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11143", "id": "11143", "author_site": "Junnan Li, Yongkang Wong, Qi Zhao, Mohan Kankanhalli", "author": "Junnan Li; Yongkang Wong; Qi Zhao; Mohan S Kankanhalli", "abstract": "The recent success in human action recognition with deep learning methods mostly adopt the supervised learning paradigm, which requires significant amount of manually labeled data to achieve good performance. However, label collection is an expensive and time-consuming process. In this work, we propose an unsupervised learning framework, which exploits unlabeled data to learn video representations. Different from previous works in video representation learning, our unsupervised learning task is to predict 3D motion in multiple target views using video representation from a source view. By learning to extrapolate cross-view motions, the representation can capture view-invariant motion dynamics which is discriminative for the action. In addition, we propose a view-adversarial training method to enhance learning of view-invariant features. We demonstrate the effectiveness of the learned representations for action recognition on multiple datasets.", "bibtex": "@inproceedings{NEURIPS2018_2f37d101,\n author = {Li, Junnan and Wong, Yongkang and Zhao, Qi and Kankanhalli, Mohan S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Learning of View-invariant Action Representations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2f37d10131f2a483a8dd005b3d14b0d9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2f37d10131f2a483a8dd005b3d14b0d9-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2f37d10131f2a483a8dd005b3d14b0d9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2f37d10131f2a483a8dd005b3d14b0d9-Reviews.html", "metareview": "", "pdf_size": 1214292, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7214892390633870522&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Grad. School for Integrative Sciences and Engineering, National University of Singapore, Singapore; School of Computing, National University of Singapore, Singapore; Dept. of Computer Science and Engineering, University of Minnesota, Minneapolis, USA; School of Computing, National University of Singapore, Singapore", "aff_domain": "u.nus.edu;nus.edu.sg;cs.umn.edu;comp.nus.edu.sg", "email": "u.nus.edu;nus.edu.sg;cs.umn.edu;comp.nus.edu.sg", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2f37d10131f2a483a8dd005b3d14b0d9-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "National University of Singapore;University of Minnesota", "aff_unique_dep": "Graduate School for Integrative Sciences and Engineering;Department of Computer Science and Engineering", "aff_unique_url": "https://www.nus.edu.sg;https://www.umn.edu", "aff_unique_abbr": "NUS;UMN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Minneapolis", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Singapore;United States" }, { "title": "Unsupervised Text Style Transfer using Language Models as Discriminators", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11702", "id": "11702", "author_site": "Zichao Yang, Zhiting Hu, Chris Dyer, Eric Xing, Taylor Berg-Kirkpatrick", "author": "Zichao Yang; Zhiting Hu; Chris Dyer; Eric P Xing; Taylor Berg-Kirkpatrick", "abstract": "Binary classifiers are employed as discriminators in GAN-based unsupervised style transfer models to ensure that transferred sentences are similar to sentences in the target domain. One difficulty with the binary discriminator is that error signal is sometimes insufficient to train the model to produce rich-structured language. In this paper, we propose a technique of using a target domain language model as the discriminator to provide richer, token-level feedback during the learning process. Because our language model scores sentences directly using a product of locally normalized probabilities, it offers more stable and more useful training signal to the generator. We train the generator to minimize the negative log likelihood (NLL) of generated sentences evaluated by a language model. By using continuous approximation of the discrete samples, our model can be trained using back-propagation in an end-to-end way. Moreover, we find empirically with a language model as a structured discriminator, it is possible to eliminate the adversarial training steps using negative samples, thus making training more stable. We compare our model with previous work using convolutional neural networks (CNNs) as discriminators and show our model outperforms them significantly in three tasks including word substitution decipherment, sentiment modification and related language translation.", "bibtex": "@inproceedings{NEURIPS2018_398475c8,\n author = {Yang, Zichao and Hu, Zhiting and Dyer, Chris and Xing, Eric P and Berg-Kirkpatrick, Taylor},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Text Style Transfer using Language Models as Discriminators},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/398475c83b47075e8897a083e97eb9f0-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/398475c83b47075e8897a083e97eb9f0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/398475c83b47075e8897a083e97eb9f0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/398475c83b47075e8897a083e97eb9f0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/398475c83b47075e8897a083e97eb9f0-Reviews.html", "metareview": "", "pdf_size": 237128, "gs_citation": 323, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=488841870167588176&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Carnegie Mellon University; Carnegie Mellon University; DeepMind; Carnegie Mellon University; Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu;google.com;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;google.com;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/398475c83b47075e8897a083e97eb9f0-Abstract.html", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Carnegie Mellon University;DeepMind", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://deepmind.com", "aff_unique_abbr": "CMU;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Unsupervised Video Object Segmentation for Deep Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11554", "id": "11554", "author_site": "Vikash Goel, Jameson Weng, Pascal Poupart", "author": "Vikash Goel; Jameson Weng; Pascal Poupart", "abstract": "We present a new technique for deep reinforcement learning that automatically detects moving objects and uses the relevant information for action selection. The detection of moving objects is done in an unsupervised way by exploiting structure from motion. Instead of directly learning a policy from raw images, the agent first learns to detect and segment moving objects by exploiting flow information in video sequences. The learned representation is then used to focus the policy of the agent on the moving objects. Over time, the agent identifies which objects are critical for decision making and gradually builds a policy based on relevant moving objects. This approach, which we call Motion-Oriented REinforcement Learning (MOREL), is demonstrated on a suite of Atari games where the ability to detect moving objects reduces the amount of interaction needed with the environment to obtain a good policy. Furthermore, the resulting policy is more interpretable than policies that directly map images to actions or values with a black box neural network. We can gain insight into the policy by inspecting the segmentation and motion of each object detected by the agent. This allows practitioners to confirm whether a policy is making decisions based on sensible information. Our code is available at https://github.com/vik-goel/MOREL.", "bibtex": "@inproceedings{NEURIPS2018_96f2b50b,\n author = {Goel, Vikash and Weng, Jameson and Poupart, Pascal},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Video Object Segmentation for Deep Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/96f2b50b5d3613adf9c27049b2a888c7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/96f2b50b5d3613adf9c27049b2a888c7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/96f2b50b5d3613adf9c27049b2a888c7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/96f2b50b5d3613adf9c27049b2a888c7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/96f2b50b5d3613adf9c27049b2a888c7-Reviews.html", "metareview": "", "pdf_size": 861386, "gs_citation": 98, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6319633307497311746&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Cheriton School of Computer Science, Waterloo AI Institute, University of Waterloo, Canada; Vector Institute, Toronto, Canada; Cheriton School of Computer Science, Waterloo AI Institute, University of Waterloo, Canada", "aff_domain": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca", "email": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca", "github": "https://github.com/vik-goel/MOREL", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/96f2b50b5d3613adf9c27049b2a888c7-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Waterloo;Vector Institute", "aff_unique_dep": "Cheriton School of Computer Science;", "aff_unique_url": "https://uwaterloo.ca;https://vectorinstitute.ai", "aff_unique_abbr": "UW;Vector Institute", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Waterloo;Toronto", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Uplift Modeling from Separate Labels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11942", "id": "11942", "author_site": "Ikko Yamane, Florian Yger, Jamal Atif, Masashi Sugiyama", "author": "Ikko Yamane; Florian Yger; Jamal Atif; Masashi Sugiyama", "abstract": "Uplift modeling is aimed at estimating the incremental impact of an action on an individual's behavior, which is useful in various application domains such as targeted marketing (advertisement campaigns) and personalized medicine (medical treatments). Conventional methods of uplift modeling require every instance to be jointly equipped with two types of labels: the taken action and its outcome. However, obtaining two labels for each instance at the same time is difficult or expensive in many real-world problems. In this paper, we propose a novel method of uplift modeling that is applicable to a more practical setting where only one type of labels is available for each instance. We show a mean squared error bound for the proposed estimator and demonstrate its effectiveness through experiments.", "bibtex": "@inproceedings{NEURIPS2018_198dd5fb,\n author = {Yamane, Ikko and Yger, Florian and Atif, Jamal and Sugiyama, Masashi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Uplift Modeling from Separate Labels},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/198dd5fb9c43b2d29a548f8c77e85cf9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/198dd5fb9c43b2d29a548f8c77e85cf9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/198dd5fb9c43b2d29a548f8c77e85cf9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/198dd5fb9c43b2d29a548f8c77e85cf9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/198dd5fb9c43b2d29a548f8c77e85cf9-Reviews.html", "metareview": "", "pdf_size": 2398430, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3966186419955676396&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "The University of Tokyo + RIKEN Center for Advanced Intelligence Project (AIP); RIKEN Center for Advanced Intelligence Project (AIP) + LAMSADE, CNRS, Universit\u00e9 Paris-Dauphine, Universit\u00e9 PSL; LAMSADE, CNRS, Universit\u00e9 Paris-Dauphine, Universit\u00e9 PSL; The University of Tokyo + RIKEN Center for Advanced Intelligence Project (AIP)", "aff_domain": "ms.k.u-tokyo.ac.jp;dauphine.fr;dauphine.fr;k.u-tokyo.ac.jp", "email": "ms.k.u-tokyo.ac.jp;dauphine.fr;dauphine.fr;k.u-tokyo.ac.jp", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/198dd5fb9c43b2d29a548f8c77e85cf9-Abstract.html", "aff_unique_index": "0+1;1+2;2;0+1", "aff_unique_norm": "University of Tokyo;RIKEN;Universit\u00e9 Paris-Dauphine", "aff_unique_dep": ";Center for Advanced Intelligence Project (AIP);LAMSADE", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.riken.jp/en/;https://www.univ-paris-dauphine.fr", "aff_unique_abbr": "UTokyo;RIKEN;UPD", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0+1;1;0+0", "aff_country_unique": "Japan;France" }, { "title": "Using Large Ensembles of Control Variates for Variational Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11945", "id": "11945", "author_site": "Tomas Geffner, Justin Domke", "author": "Tomas Geffner; Justin Domke", "abstract": "Variational inference is increasingly being addressed with stochastic optimization. In this setting, the gradient's variance plays a crucial role in the optimization procedure, since high variance gradients lead to poor convergence. A popular approach used to reduce gradient's variance involves the use of control variates. Despite the good results obtained, control variates developed for variational inference are typically looked at in isolation. In this paper we clarify the large number of control variates that are available by giving a systematic view of how they are derived. We also present a Bayesian risk minimization framework in which the quality of a procedure for combining control variates is quantified by its effect on optimization convergence rates, which leads to a very simple combination rule. Results show that combining a large number of control variates this way significantly improves the convergence of inference over using the typical gradient estimators or a reduced number of control variates.", "bibtex": "@inproceedings{NEURIPS2018_dead35fa,\n author = {Geffner, Tomas and Domke, Justin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Using Large Ensembles of Control Variates for Variational Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/dead35fa1512ad67301d09326177c42f-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/dead35fa1512ad67301d09326177c42f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/dead35fa1512ad67301d09326177c42f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/dead35fa1512ad67301d09326177c42f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/dead35fa1512ad67301d09326177c42f-Reviews.html", "metareview": "", "pdf_size": 771477, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18277264841590676267&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "College of Information and Computer Science, University of Massachusetts; College of Information and Computer Science, University of Massachusetts", "aff_domain": "cs.umass.edu;cs.umass.edu", "email": "cs.umass.edu;cs.umass.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/dead35fa1512ad67301d09326177c42f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Massachusetts", "aff_unique_dep": "College of Information and Computer Science", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Using Trusted Data to Train Deep Networks on Labels Corrupted by Severe Noise", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11989", "id": "11989", "author_site": "Dan Hendrycks, Mantas Mazeika, Duncan Wilson, Kevin Gimpel", "author": "Dan Hendrycks; Mantas Mazeika; Duncan Wilson; Kevin Gimpel", "abstract": "The growing importance of massive datasets with the advent of deep learning makes robustness to label noise a critical property for classifiers to have. Sources of label noise include automatic labeling for large datasets, non-expert labeling, and label corruption by data poisoning adversaries. In the latter case, corruptions may be arbitrarily bad, even so bad that a classifier predicts the wrong labels with high confidence. To protect against such sources of noise, we leverage the fact that a small set of clean labels is often easy to procure. We demonstrate that robustness to label noise up to severe strengths can be achieved by using a set of trusted data with clean labels, and propose a loss correction that utilizes trusted examples in a data-efficient manner to mitigate the effects of label noise on deep neural network classifiers. Across vision and natural language processing tasks, we experiment with various label noises at several strengths, and show that our method significantly outperforms existing methods.", "bibtex": "@inproceedings{NEURIPS2018_ad554d8c,\n author = {Hendrycks, Dan and Mazeika, Mantas and Wilson, Duncan and Gimpel, Kevin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Using Trusted Data to Train Deep Networks on Labels Corrupted by Severe Noise},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ad554d8c3b06d6b97ee76a2448bd7913-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ad554d8c3b06d6b97ee76a2448bd7913-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ad554d8c3b06d6b97ee76a2448bd7913-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ad554d8c3b06d6b97ee76a2448bd7913-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ad554d8c3b06d6b97ee76a2448bd7913-Reviews.html", "metareview": "", "pdf_size": 378095, "gs_citation": 705, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3616817429291706463&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "University of California, Berkeley; University of Chicago; Foundational Research Institute; Toyota Technological Institute at Chicago", "aff_domain": "berkeley.edu;ttic.edu;nevada.unr.edu;ttic.edu", "email": "berkeley.edu;ttic.edu;nevada.unr.edu;ttic.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ad554d8c3b06d6b97ee76a2448bd7913-Abstract.html", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of California, Berkeley;University of Chicago;Foundational Research Institute;Toyota Technological Institute at Chicago", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.berkeley.edu;https://www.uchicago.edu;https://fhi.razorpay.com/;https://www.tti-chicago.org", "aff_unique_abbr": "UC Berkeley;UChicago;FRI;TTI Chicago", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Berkeley;;Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Variance-Reduced Stochastic Gradient Descent on Streaming Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11940", "id": "11940", "author_site": "Ellango Jothimurugesan, Ashraf Tahmasbi, Phillip Gibbons, Srikanta Tirthapura", "author": "Ellango Jothimurugesan; Ashraf Tahmasbi; Phillip Gibbons; Srikanta Tirthapura", "abstract": "We present an algorithm STRSAGA for efficiently maintaining a machine learning model over data points that arrive over time, quickly updating the model as new training data is observed. We present a competitive analysis comparing the sub-optimality of the model maintained by STRSAGA with that of an offline algorithm that is given the entire data beforehand, and analyze the risk-competitiveness of STRSAGA under different arrival patterns. Our theoretical and experimental results show that the risk of STRSAGA is comparable to that of offline algorithms on a variety of input arrival patterns, and its experimental performance is significantly better than prior algorithms suited for streaming data, such as SGD and SSVRG.", "bibtex": "@inproceedings{NEURIPS2018_cebd648f,\n author = {Jothimurugesan, Ellango and Tahmasbi, Ashraf and Gibbons, Phillip and Tirthapura, Srikanta},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Variance-Reduced Stochastic Gradient Descent on Streaming Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/cebd648f9146a6345d604ab093b02c73-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/cebd648f9146a6345d604ab093b02c73-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/cebd648f9146a6345d604ab093b02c73-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/cebd648f9146a6345d604ab093b02c73-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/cebd648f9146a6345d604ab093b02c73-Reviews.html", "metareview": "", "pdf_size": 526385, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5063353326382764733&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Carnegie Mellon University; Iowa State University; Carnegie Mellon University; Iowa State University", "aff_domain": "cs.cmu.edu;iastate.edu;cs.cmu.edu;iastate.edu", "email": "cs.cmu.edu;iastate.edu;cs.cmu.edu;iastate.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/cebd648f9146a6345d604ab093b02c73-Abstract.html", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Carnegie Mellon University;Iowa State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.iastate.edu", "aff_unique_abbr": "CMU;ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Variational Bayesian Monte Carlo", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11786", "id": "11786", "author": "Luigi Acerbi", "abstract": "Many probabilistic models of interest in scientific computing and machine learning have expensive, black-box likelihoods that prevent the application of standard techniques for Bayesian inference, such as MCMC, which would require access to the gradient or a large number of likelihood evaluations.\nWe introduce here a novel sample-efficient inference framework, Variational Bayesian Monte Carlo (VBMC). VBMC combines variational inference with Gaussian-process based, active-sampling Bayesian quadrature, using the latter to efficiently approximate the intractable integral in the variational objective.\nOur method produces both a nonparametric approximation of the posterior distribution and an approximate lower bound of the model evidence, useful for model selection.\nWe demonstrate VBMC both on several synthetic likelihoods and on a neuronal model with data from real neurons. Across all tested problems and dimensions (up to D = 10), VBMC performs consistently well in reconstructing the posterior and the model evidence with a limited budget of likelihood evaluations, unlike other methods that work only in very low dimensions. Our framework shows great promise as a novel tool for posterior and model inference with expensive, black-box likelihoods.", "bibtex": "@inproceedings{NEURIPS2018_747c1bcc,\n author = {Acerbi, Luigi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Variational Bayesian Monte Carlo},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/747c1bcceb6109a4ef936bc70cfe67de-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/747c1bcceb6109a4ef936bc70cfe67de-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/747c1bcceb6109a4ef936bc70cfe67de-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/747c1bcceb6109a4ef936bc70cfe67de-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/747c1bcceb6109a4ef936bc70cfe67de-Reviews.html", "metareview": "", "pdf_size": 1074333, "gs_citation": 96, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1503005530193586627&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/747c1bcceb6109a4ef936bc70cfe67de-Abstract.html" }, { "title": "Variational Inference with Tail-adaptive f-Divergence", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11559", "id": "11559", "author_site": "Dilin Wang, Hao Liu, Qiang Liu", "author": "Dilin Wang; Hao Liu; Qiang Liu", "abstract": "Variational inference with \u03b1-divergences has been widely used in modern probabilistic\nmachine learning. Compared to Kullback-Leibler (KL) divergence, a major\nadvantage of using \u03b1-divergences (with positive \u03b1 values) is their mass-covering\nproperty. However, estimating and optimizing \u03b1-divergences require to use importance\nsampling, which could have extremely large or infinite variances due\nto heavy tails of importance weights. In this paper, we propose a new class of\ntail-adaptive f-divergences that adaptively change the convex function f with the\ntail of the importance weights, in a way that theoretically guarantee finite moments,\nwhile simultaneously achieving mass-covering properties. We test our methods\non Bayesian neural networks, as well as deep reinforcement learning in which our\nmethod is applied to improve a recent soft actor-critic (SAC) algorithm (Haarnoja\net al., 2018). Our results show that our approach yields significant advantages\ncompared with existing methods based on classical KL and \u03b1-divergences.", "bibtex": "@inproceedings{NEURIPS2018_1cd138d0,\n author = {Wang, Dilin and Liu, Hao and Liu, Qiang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Variational Inference with Tail-adaptive f-Divergence},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1cd138d0499a68f4bb72bee04bbec2d7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1cd138d0499a68f4bb72bee04bbec2d7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1cd138d0499a68f4bb72bee04bbec2d7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1cd138d0499a68f4bb72bee04bbec2d7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1cd138d0499a68f4bb72bee04bbec2d7-Reviews.html", "metareview": "", "pdf_size": 4113272, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1588246766149700607&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "UT Austin; UESTC + UT Austin; UT Austin", "aff_domain": "cs.utexas.edu;gmail.com;cs.utexas.edu", "email": "cs.utexas.edu;gmail.com;cs.utexas.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1cd138d0499a68f4bb72bee04bbec2d7-Abstract.html", "aff_unique_index": "0;1+0;0", "aff_unique_norm": "University of Texas at Austin;University of Electronic Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.uestc.edu.cn", "aff_unique_abbr": "UT Austin;UESTC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1+0;0", "aff_country_unique": "United States;China" }, { "title": "Variational Inverse Control with Events: A General Framework for Data-Driven Reward Definition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11816", "id": "11816", "author_site": "Justin Fu, Avi Singh, Dibya Ghosh, Larry Yang, Sergey Levine", "author": "Justin Fu; Avi Singh; Dibya Ghosh; Larry Yang; Sergey Levine", "abstract": "The design of a reward function often poses a major practical challenge to real-world applications of reinforcement learning. Approaches such as inverse reinforcement learning attempt to overcome this challenge, but require expert demonstrations, which can be difficult or expensive to obtain in practice. We propose inverse event-based control, which generalizes inverse reinforcement learning methods to cases where full demonstrations are not needed, such as when only samples of desired goal states are available. Our method is grounded in an alternative perspective on control and reinforcement learning, where an agent's goal is to maximize the probability that one or more events will happen at some point in the future, rather than maximizing cumulative rewards. We demonstrate the effectiveness of our methods on continuous control tasks, with a focus on high-dimensional observations like images where rewards are hard or even impossible to specify.", "bibtex": "@inproceedings{NEURIPS2018_c9319967,\n author = {Fu, Justin and Singh, Avi and Ghosh, Dibya and Yang, Larry and Levine, Sergey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Variational Inverse Control with Events: A General Framework for Data-Driven Reward Definition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/c9319967c038f9b923068dabdf60cfe3-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/c9319967c038f9b923068dabdf60cfe3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/c9319967c038f9b923068dabdf60cfe3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/c9319967c038f9b923068dabdf60cfe3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/c9319967c038f9b923068dabdf60cfe3-Reviews.html", "metareview": "", "pdf_size": 551338, "gs_citation": 155, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3389041385351985283&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/c9319967c038f9b923068dabdf60cfe3-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Variational Learning on Aggregate Outputs with Gaussian Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11590", "id": "11590", "author_site": "Ho Chung Law, Dino Sejdinovic, Ewan Cameron, Tim Lucas, Seth Flaxman, Katherine Battle, Kenji Fukumizu", "author": "Ho Chung Law; Dino Sejdinovic; Ewan Cameron; Tim Lucas; Seth Flaxman; Katherine Battle; Kenji Fukumizu", "abstract": "While a typical supervised learning framework assumes that the inputs and the outputs are measured at the same levels of granularity, many applications, including global mapping of disease, only have access to outputs at a much coarser level than that of the inputs. Aggregation of outputs makes generalization to new inputs much more difficult. We consider an approach to this problem based on variational learning with a model of output aggregation and Gaussian processes, where aggregation leads to intractability of the standard evidence lower bounds. We propose new bounds and tractable approximations, leading to improved prediction accuracy and scalability to large datasets, while explicitly taking uncertainty into account. We develop a framework which extends to several types of likelihoods, including the Poisson model for aggregated count data. We apply our framework to a challenging and important problem, the fine-scale spatial modelling of malaria incidence, with over 1 million observations.", "bibtex": "@inproceedings{NEURIPS2018_24b43fb0,\n author = {Law, Ho Chung and Sejdinovic, Dino and Cameron, Ewan and Lucas, Tim and Flaxman, Seth and Battle, Katherine and Fukumizu, Kenji},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Variational Learning on Aggregate Outputs with Gaussian Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/24b43fb034a10d78bec71274033b4096-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/24b43fb034a10d78bec71274033b4096-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/24b43fb034a10d78bec71274033b4096-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/24b43fb034a10d78bec71274033b4096-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/24b43fb034a10d78bec71274033b4096-Reviews.html", "metareview": "", "pdf_size": 3294497, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13818534818936105660&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of Oxford; University of Oxford+Alan Turing Institute; University of Oxford; University of Oxford; Imperial College London; University Of Oxford; Institute of Statistical Mathematics", "aff_domain": "stats.ox.ac.uk;stats.ox.ac.uk;gmail.com;gmail.com;imperial.ac.uk;bdi.ox.ac.uk;ism.ac.jp", "email": "stats.ox.ac.uk;stats.ox.ac.uk;gmail.com;gmail.com;imperial.ac.uk;bdi.ox.ac.uk;ism.ac.jp", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/24b43fb034a10d78bec71274033b4096-Abstract.html", "aff_unique_index": "0;0+1;0;0;2;0;3", "aff_unique_norm": "University of Oxford;Alan Turing Institute;Imperial College London;Institute of Statistical Mathematics", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.turing.ac.uk;https://www.imperial.ac.uk;https://www.ism.ac.jp", "aff_unique_abbr": "Oxford;ATI;ICL;ISM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0;0;0;0;0;1", "aff_country_unique": "United Kingdom;Japan" }, { "title": "Variational Memory Encoder-Decoder", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11166", "id": "11166", "author_site": "Hung Le, Truyen Tran, Thin Nguyen, Svetha Venkatesh", "author": "Hung Le; Truyen Tran; Thin Nguyen; Svetha Venkatesh", "abstract": "Introducing variability while maintaining coherence is a core task in learning to generate utterances in conversation. Standard neural encoder-decoder models and their extensions using conditional variational autoencoder often result in either trivial or digressive responses. To overcome this, we explore a novel approach that injects variability into neural encoder-decoder via the use of external memory as a mixture model, namely Variational Memory Encoder-Decoder (VMED). By associating each memory read with a mode in the latent mixture distribution at each timestep, our model can capture the variability observed in sequential data such as natural conversations. We empirically compare the proposed model against other recent approaches on various conversational datasets. The results show that VMED consistently achieves significant improvement over others in both metric-based and qualitative evaluations.", "bibtex": "@inproceedings{NEURIPS2018_e57c6b95,\n author = {Le, Hung and Tran, Truyen and Nguyen, Thin and Venkatesh, Svetha},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Variational Memory Encoder-Decoder},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e57c6b956a6521b28495f2886ca0977a-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e57c6b956a6521b28495f2886ca0977a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/e57c6b956a6521b28495f2886ca0977a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e57c6b956a6521b28495f2886ca0977a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e57c6b956a6521b28495f2886ca0977a-Reviews.html", "metareview": "", "pdf_size": 438938, "gs_citation": 43, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16470131384989674730&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Applied AI Institute, Deakin University, Geelong, Australia; Applied AI Institute, Deakin University, Geelong, Australia; Applied AI Institute, Deakin University, Geelong, Australia; Applied AI Institute, Deakin University, Geelong, Australia", "aff_domain": "deakin.edu.au;deakin.edu.au;deakin.edu.au;deakin.edu.au", "email": "deakin.edu.au;deakin.edu.au;deakin.edu.au;deakin.edu.au", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e57c6b956a6521b28495f2886ca0977a-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Deakin University", "aff_unique_dep": "Applied AI Institute", "aff_unique_url": "https://www.deakin.edu.au", "aff_unique_abbr": "", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Geelong", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Variational PDEs for Acceleration on Manifolds and Application to Diffeomorphisms", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11378", "id": "11378", "author_site": "Ganesh Sundaramoorthi, Anthony Yezzi", "author": "Ganesh Sundaramoorthi; Anthony Yezzi", "abstract": "We consider the optimization of cost functionals on manifolds and derive a variational approach to accelerated methods on manifolds. We demonstrate the methodology on the infinite-dimensional manifold of diffeomorphisms, motivated by registration problems in computer vision. We build on the variational approach to accelerated optimization by Wibisono, Wilson and Jordan, which applies in finite dimensions, and generalize that approach to infinite dimensional manifolds. We derive the continuum evolution equations, which are partial differential equations (PDE), and relate them to simple mechanical principles. Our approach can also be viewed as a generalization of the $L^2$ optimal mass transport problem. Our approach evolves an infinite number of particles endowed with mass, represented as a mass density. The density evolves with the optimization variable, and endows the particles with dynamics. This is different than current accelerated methods where only a single particle moves and hence the dynamics does not depend on the mass. We derive the theory, compute the PDEs for acceleration, and illustrate the behavior of this new accelerated optimization scheme.", "bibtex": "@inproceedings{NEURIPS2018_68148596,\n author = {Sundaramoorthi, Ganesh and Yezzi, Anthony},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Variational PDEs for Acceleration on Manifolds and Application to Diffeomorphisms},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/68148596109e38cf9367d27875e185be-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/68148596109e38cf9367d27875e185be-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/68148596109e38cf9367d27875e185be-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/68148596109e38cf9367d27875e185be-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/68148596109e38cf9367d27875e185be-Reviews.html", "metareview": "", "pdf_size": 460880, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17317762122929131233&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "United Technologies Research Center, East Hartford, CT 06118; School of Electrical & Computer Engineering, Georgia Institute of Technology, Atlanta, GA 30332", "aff_domain": "utrc.utc.com;ece.gatech.edu", "email": "utrc.utc.com;ece.gatech.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/68148596109e38cf9367d27875e185be-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "United Technologies Research Center;Georgia Institute of Technology", "aff_unique_dep": ";School of Electrical & Computer Engineering", "aff_unique_url": "https://www.utex.com/research;https://www.gatech.edu", "aff_unique_abbr": "UTRC;Georgia Tech", "aff_campus_unique_index": "0;1", "aff_campus_unique": "East Hartford;Atlanta", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Verifiable Reinforcement Learning via Policy Extraction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11258", "id": "11258", "author_site": "Osbert Bastani, Yewen Pu, Armando Solar-Lezama", "author": "Osbert Bastani; Yewen Pu; Armando Solar-Lezama", "abstract": "While deep reinforcement learning has successfully solved many challenging control tasks, its real-world applicability has been limited by the inability to ensure the safety of learned policies. We propose an approach to verifiable reinforcement learning by training decision tree policies, which can represent complex policies (since they are nonparametric), yet can be efficiently verified using existing techniques (since they are highly structured). The challenge is that decision tree policies are difficult to train. We propose VIPER, an algorithm that combines ideas from model compression and imitation learning to learn decision tree policies guided by a DNN policy (called the oracle) and its Q-function, and show that it substantially outperforms two baselines. We use VIPER to (i) learn a provably robust decision tree policy for a variant of Atari Pong with a symbolic state space, (ii) learn a decision tree policy for a toy game based on Pong that provably never loses, and (iii) learn a provably stable decision tree policy for cart-pole. In each case, the decision tree policy achieves performance equal to that of the original DNN policy.", "bibtex": "@inproceedings{NEURIPS2018_e6d8545d,\n author = {Bastani, Osbert and Pu, Yewen and Solar-Lezama, Armando},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Verifiable Reinforcement Learning via Policy Extraction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/e6d8545daa42d5ced125a4bf747b3688-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/e6d8545daa42d5ced125a4bf747b3688-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/e6d8545daa42d5ced125a4bf747b3688-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/e6d8545daa42d5ced125a4bf747b3688-Reviews.html", "metareview": "", "pdf_size": 911184, "gs_citation": 455, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2373891892945892860&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "MIT; MIT; MIT", "aff_domain": "csail.mit.edu;mit.edu;csail.mit.edu", "email": "csail.mit.edu;mit.edu;csail.mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/e6d8545daa42d5ced125a4bf747b3688-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Video Prediction via Selective Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11184", "id": "11184", "author_site": "Jingwei Xu, Bingbing Ni, Xiaokang Yang", "author": "Jingwei Xu; Bingbing Ni; Xiaokang Yang", "abstract": "Most adversarial learning based video prediction methods suffer from image blur, since the commonly used adversarial and regression loss pair work rather in a competitive way than collaboration, yielding compromised blur effect. \n In the meantime, as often relying on a single-pass architecture, the predictor is inadequate to explicitly capture the forthcoming uncertainty.\n Our work involves two key insights:\n (1) Video prediction can be approached as a stochastic process: we sample a collection of proposals conforming to possible frame distribution at following time stamp, and one can select the final prediction from it.\n (2) De-coupling combined loss functions into dedicatedly designed sub-networks encourages them to work in a collaborative way.\n Combining above two insights we propose a two-stage network called VPSS (\\textbf{V}ideo \\textbf{P}rediction via \\textbf{S}elective \\textbf{S}ampling).", "bibtex": "@inproceedings{NEURIPS2018_ede7e2b6,\n author = {Xu, Jingwei and Ni, Bingbing and Yang, Xiaokang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Video Prediction via Selective Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ede7e2b6d13a41ddf9f4bdef84fdc737-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ede7e2b6d13a41ddf9f4bdef84fdc737-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ede7e2b6d13a41ddf9f4bdef84fdc737-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ede7e2b6d13a41ddf9f4bdef84fdc737-Reviews.html", "metareview": "", "pdf_size": 1533031, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11532978294731885329&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "MoE Key Lab of Arti\ufb01cial Intelligence, AI Institute, SJTU-UCLA Joint Research Center on Machine Perception and Inference, Shanghai Jiao Tong University, Shanghai 200240, China; MoE Key Lab of Arti\ufb01cial Intelligence, AI Institute, SJTU-UCLA Joint Research Center on Machine Perception and Inference, Shanghai Jiao Tong University, Shanghai 200240, China; Shanghai Institute for Advanced Communication and Data Science", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "email": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ede7e2b6d13a41ddf9f4bdef84fdc737-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Shanghai Jiao Tong University;Shanghai Institute for Advanced Communication and Data Science", "aff_unique_dep": "MoE Key Lab of Arti\ufb01cial Intelligence;", "aff_unique_url": "https://www.sjtu.edu.cn;", "aff_unique_abbr": "SJTU;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shanghai;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Video-to-Video Synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11133", "id": "11133", "author_site": "Ting-Chun Wang, Ming-Yu Liu, Jun-Yan Zhu, Guilin Liu, Andrew Tao, Jan Kautz, Bryan Catanzaro", "author": "Ting-Chun Wang; Ming-Yu Liu; Jun-Yan Zhu; Guilin Liu; Andrew Tao; Jan Kautz; Bryan Catanzaro", "abstract": "We study the problem of video-to-video synthesis, whose goal is to learn a mapping function from an input source video (e.g., a sequence of semantic segmentation masks) to an output photorealistic video that precisely depicts the content of the source video. While its image counterpart, the image-to-image translation problem, is a popular topic, the video-to-video synthesis problem is less explored in the literature. Without modeling temporal dynamics, directly applying existing image synthesis approaches to an input video often results in temporally incoherent videos of low visual quality. In this paper, we propose a video-to-video synthesis approach under the generative adversarial learning framework. Through carefully-designed generators and discriminators, coupled with a spatio-temporal adversarial objective, we achieve high-resolution, photorealistic, temporally coherent video results on a diverse set of input formats including segmentation masks, sketches, and poses. Experiments on multiple benchmarks show the advantage of our method compared to strong baselines. In particular, our model is capable of synthesizing 2K resolution videos of street scenes up to 30 seconds long, which significantly advances the state-of-the-art of video synthesis. Finally, we apply our method to future video prediction, outperforming several competing systems. Code, models, and more results are available at our website: https://github.com/NVIDIA/vid2vid. (Please use Adobe Reader to see the embedded videos in the paper.)", "bibtex": "@inproceedings{NEURIPS2018_d86ea612,\n author = {Wang, Ting-Chun and Liu, Ming-Yu and Zhu, Jun-Yan and Liu, Guilin and Tao, Andrew and Kautz, Jan and Catanzaro, Bryan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Video-to-Video Synthesis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d86ea612dec96096c5e0fcc8dd42ab6d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d86ea612dec96096c5e0fcc8dd42ab6d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/d86ea612dec96096c5e0fcc8dd42ab6d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d86ea612dec96096c5e0fcc8dd42ab6d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d86ea612dec96096c5e0fcc8dd42ab6d-Reviews.html", "metareview": "", "pdf_size": 90720073, "gs_citation": 1308, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3120460092236365926&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "NVIDIA; NVIDIA; MIT CSAIL; NVIDIA; NVIDIA; NVIDIA; NVIDIA", "aff_domain": "nvidia.com;nvidia.com;mit.edu;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "email": "nvidia.com;nvidia.com;mit.edu;nvidia.com;nvidia.com;nvidia.com;nvidia.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d86ea612dec96096c5e0fcc8dd42ab6d-Abstract.html", "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "NVIDIA;Massachusetts Institute of Technology", "aff_unique_dep": "NVIDIA Corporation;Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.nvidia.com;https://www.csail.mit.edu", "aff_unique_abbr": "NVIDIA;MIT CSAIL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "VideoCapsuleNet: A Simplified Network for Action Detection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11731", "id": "11731", "author_site": "Kevin Duarte, Yogesh Rawat, Mubarak Shah", "author": "Kevin Duarte; Yogesh Rawat; Mubarak Shah", "abstract": "The recent advances in Deep Convolutional Neural Networks (DCNNs) have shown extremely good results for video human action classification, however, action detection is still a challenging problem. The current action detection approaches follow a complex pipeline which involves multiple tasks such as tube proposals, optical flow, and tube classification. In this work, we present a more elegant solution for action detection based on the recently developed capsule network. We propose a 3D capsule network for videos, called VideoCapsuleNet: a unified network for action detection which can jointly perform pixel-wise action segmentation along with action classification. The proposed network is a generalization of capsule network from 2D to 3D, which takes a sequence of video frames as input. The 3D generalization drastically increases the number of capsules in the network, making capsule routing computationally expensive. We introduce capsule-pooling in the convolutional capsule layer to address this issue and make the voting algorithm tractable. The routing-by-agreement in the network inherently models the action representations and various action characteristics are captured by the predicted capsules. This inspired us to utilize the capsules for action localization and the class-specific capsules predicted by the network are used to determine a pixel-wise localization of actions. The localization is further improved by parameterized skip connections with the convolutional capsule layers and the network is trained end-to-end with a classification as well as localization loss. The proposed network achieves state-of-the-art performance on multiple action detection datasets including UCF-Sports, J-HMDB, and UCF-101 (24 classes) with an impressive ~20% improvement on UCF-101 and ~15% improvement on J-HMDB in terms of v-mAP scores.", "bibtex": "@inproceedings{NEURIPS2018_73f104c9,\n author = {Duarte, Kevin and Rawat, Yogesh and Shah, Mubarak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {VideoCapsuleNet: A Simplified Network for Action Detection},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/73f104c9fba50050eea11d9d075247cc-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/73f104c9fba50050eea11d9d075247cc-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/73f104c9fba50050eea11d9d075247cc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/73f104c9fba50050eea11d9d075247cc-Reviews.html", "metareview": "", "pdf_size": 2578101, "gs_citation": 234, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3192977579896330073&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Center for Research in Computer Vision; Center for Research in Computer Vision; Center for Research in Computer Vision", "aff_domain": "knights.ucf.edu;crcv.ucf.edu;crcv.ucf.edu", "email": "knights.ucf.edu;crcv.ucf.edu;crcv.ucf.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/73f104c9fba50050eea11d9d075247cc-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Center for Research in Computer Vision", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "Virtual Class Enhanced Discriminative Embedding Learning", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11206", "id": "11206", "author_site": "Binghui Chen, Weihong Deng, Haifeng Shen", "author": "Binghui Chen; Weihong Deng; Haifeng Shen", "abstract": "Recently, learning discriminative features to improve the recognition performances gradually becomes the primary goal of deep learning, and numerous remarkable works have emerged. In this paper, we propose a novel yet extremely simple method Virtual Softmax to enhance the discriminative property of learned features by injecting a dynamic virtual negative class into the original softmax. Injecting virtual class aims to enlarge inter-class margin and compress intra-class distribution by strengthening the decision boundary constraint. Although it seems weird to optimize with this additional virtual class, we show that our method derives from an intuitive and clear motivation, and it indeed encourages the features to be more compact and separable. This paper empirically and experimentally demonstrates the superiority of Virtual Softmax, improving the performances on a variety of object classification and face verification tasks.", "bibtex": "@inproceedings{NEURIPS2018_d79aac07,\n author = {Chen, Binghui and Deng, Weihong and Shen, Haifeng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Virtual Class Enhanced Discriminative Embedding Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/d79aac075930c83c2f1e369a511148fe-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/d79aac075930c83c2f1e369a511148fe-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/d79aac075930c83c2f1e369a511148fe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/d79aac075930c83c2f1e369a511148fe-Reviews.html", "metareview": "", "pdf_size": 3124730, "gs_citation": 66, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13588083557854610485&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 13, "aff": "Beijing University of Posts and Telecommunications; Beijing University of Posts and Telecommunications; AI Labs, Didi Chuxing, Beijing 100193, China", "aff_domain": "bupt.edu.cn;bupt.edu.cn;didiglobal.com", "email": "bupt.edu.cn;bupt.edu.cn;didiglobal.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/d79aac075930c83c2f1e369a511148fe-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Didi Chuxing", "aff_unique_dep": ";AI Labs", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.didi.com", "aff_unique_abbr": "BUPT;Didi", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Visual Memory for Robust Path Following", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11099", "id": "11099", "author_site": "Ashish Kumar, Saurabh Gupta, David Fouhey, Sergey Levine, Jitendra Malik", "author": "Ashish Kumar; Saurabh Gupta; David Fouhey; Sergey Levine; Jitendra Malik", "abstract": "Humans routinely retrace a path in a novel environment both forwards and backwards despite uncertainty in their motion. In this paper, we present an approach for doing so. Given a demonstration of a path, a first network generates an abstraction of the path. Equipped with this abstraction, a second network then observes the world and decides how to act in order to retrace the path under noisy actuation and a changing environment. The two networks are optimized end-to-end at training time. We evaluate the method in two realistic simulators, performing path following both forwards and backwards. Our experiments show that our approach outperforms both a classical approach to solving this task as well as a number of other baselines.", "bibtex": "@inproceedings{NEURIPS2018_66368270,\n author = {Kumar, Ashish and Gupta, Saurabh and Fouhey, David and Levine, Sergey and Malik, Jitendra},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Visual Memory for Robust Path Following},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/66368270ffd51418ec58bd793f2d9b1b-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/66368270ffd51418ec58bd793f2d9b1b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/66368270ffd51418ec58bd793f2d9b1b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/66368270ffd51418ec58bd793f2d9b1b-Reviews.html", "metareview": "", "pdf_size": 1523610, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14780650498101569340&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley", "aff_domain": "berkeley.edu;eecs.berkeley.edu;eecs.berkeley.edu;eecs.berkeley.edu;eecs.berkeley.edu", "email": "berkeley.edu;eecs.berkeley.edu;eecs.berkeley.edu;eecs.berkeley.edu;eecs.berkeley.edu", "github": "", "project": "https://ashishkumar1993.github.io/rpf/", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/66368270ffd51418ec58bd793f2d9b1b-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Visual Object Networks: Image Generation with Disentangled 3D Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11039", "id": "11039", "author_site": "Jun-Yan Zhu, Zhoutong Zhang, Chengkai Zhang, Jiajun Wu, Antonio Torralba, Josh Tenenbaum, Bill Freeman", "author": "Jun-Yan Zhu; Zhoutong Zhang; Chengkai Zhang; Jiajun Wu; Antonio Torralba; Josh Tenenbaum; Bill Freeman", "abstract": "Recent progress in deep generative models has led to tremendous breakthroughs in image generation. While being able to synthesize photorealistic images, existing models lack an understanding of our underlying 3D world. Different from previous works built on 2D datasets and models, we present a new generative model, Visual Object Networks (VONs), synthesizing natural images of objects with a disentangled 3D representation. Inspired by classic graphics rendering pipelines, we unravel the image formation process into three conditionally independent factors---shape, viewpoint, and texture---and present an end-to-end adversarial learning framework that jointly models 3D shape and 2D texture. Our model first learns to synthesize 3D shapes that are indistinguishable from real shapes. It then renders the object's 2.5D sketches (i.e., silhouette and depth map) from its shape under a sampled viewpoint. Finally, it learns to add realistic textures to these 2.5D sketches to generate realistic images. The VON not only generates images that are more realistic than the state-of-the-art 2D image synthesis methods but also enables many 3D operations such as changing the viewpoint of a generated image, shape and texture editing, linear interpolation in texture and shape space, and transferring appearance across different objects and viewpoints.", "bibtex": "@inproceedings{NEURIPS2018_92cc2275,\n author = {Zhu, Jun-Yan and Zhang, Zhoutong and Zhang, Chengkai and Wu, Jiajun and Torralba, Antonio and Tenenbaum, Josh and Freeman, Bill},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Visual Object Networks: Image Generation with Disentangled 3D Representations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/92cc227532d17e56e07902b254dfad10-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/92cc227532d17e56e07902b254dfad10-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/92cc227532d17e56e07902b254dfad10-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/92cc227532d17e56e07902b254dfad10-Reviews.html", "metareview": "", "pdf_size": 4941171, "gs_citation": 282, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3404291286977602499&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "MIT CSAIL; MIT CSAIL; MIT CSAIL; MIT CSAIL; MIT CSAIL; MIT CSAIL; MIT CSAIL+Google", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/92cc227532d17e56e07902b254dfad10-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0+1", "aff_unique_norm": "Massachusetts Institute of Technology;Google", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory;Google", "aff_unique_url": "https://www.csail.mit.edu;https://www.google.com", "aff_unique_abbr": "MIT CSAIL;Google", "aff_campus_unique_index": "0;0;0;0;0;0;0+1", "aff_campus_unique": "Cambridge;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0+0", "aff_country_unique": "United States" }, { "title": "Visual Reinforcement Learning with Imagined Goals", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11876", "id": "11876", "author_site": "Ashvin Nair, Vitchyr Pong, Murtaza Dalal, Shikhar Bahl, Steven Lin, Sergey Levine", "author": "Ashvin V Nair; Vitchyr Pong; Murtaza Dalal; Shikhar Bahl; Steven Lin; Sergey Levine", "abstract": "For an autonomous agent to fulfill a wide range of user-specified goals at test time, it must be able to learn broadly applicable and general-purpose skill repertoires. Furthermore, to provide the requisite level of generality, these skills must handle raw sensory input such as images. In this paper, we propose an algorithm that acquires such general-purpose skills by combining unsupervised representation learning and reinforcement learning of goal-conditioned policies. Since the particular goals that might be required at test-time are not known in advance, the agent performs a self-supervised \"practice\" phase where it imagines goals and attempts to achieve them. We learn a visual representation with three distinct purposes: sampling goals for self-supervised practice, providing a structured transformation of raw sensory inputs, and computing a reward signal for goal reaching. We also propose a retroactive goal relabeling scheme to further improve the sample-efficiency of our method. Our off-policy algorithm is efficient enough to learn policies that operate on raw image observations and goals in a real-world physical system, and substantially outperforms prior techniques.", "bibtex": "@inproceedings{NEURIPS2018_7ec69dd4,\n author = {Nair, Ashvin V and Pong, Vitchyr and Dalal, Murtaza and Bahl, Shikhar and Lin, Steven and Levine, Sergey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Visual Reinforcement Learning with Imagined Goals},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/7ec69dd44416c46745f6edd947b470cd-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/7ec69dd44416c46745f6edd947b470cd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/7ec69dd44416c46745f6edd947b470cd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/7ec69dd44416c46745f6edd947b470cd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/7ec69dd44416c46745f6edd947b470cd-Reviews.html", "metareview": "", "pdf_size": 775760, "gs_citation": 665, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5007292417648560707&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley; University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/7ec69dd44416c46745f6edd947b470cd-Abstract.html", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Visualizing the Loss Landscape of Neural Nets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11618", "id": "11618", "author_site": "Hao Li, Zheng Xu, Gavin Taylor, Christoph Studer, Tom Goldstein", "author": "Hao Li; Zheng Xu; Gavin Taylor; Christoph Studer; Tom Goldstein", "abstract": "Neural network training relies on our ability to find \"good\" minimizers of highly non-convex loss functions. It is well known that certain network architecture designs (e.g., skip connections) produce loss functions that train easier, and well-chosen training parameters (batch size, learning rate, optimizer) produce minimizers that generalize better. However, the reasons for these differences, and their effect on the underlying loss landscape, is not well understood. In this paper, we explore the structure of neural loss functions, and the effect of loss landscapes on generalization, using a range of visualization methods. First, we introduce a simple \"filter normalization\" method that helps us visualize loss function curvature, and make meaningful side-by-side comparisons between loss functions. Then, using a variety of visualizations, we explore how network architecture affects the loss landscape, and how training parameters affect the shape of minimizers.", "bibtex": "@inproceedings{NEURIPS2018_a41b3bb3,\n author = {Li, Hao and Xu, Zheng and Taylor, Gavin and Studer, Christoph and Goldstein, Tom},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Visualizing the Loss Landscape of Neural Nets},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/a41b3bb3e6b050b6c9067c67f663b915-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/a41b3bb3e6b050b6c9067c67f663b915-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/a41b3bb3e6b050b6c9067c67f663b915-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/a41b3bb3e6b050b6c9067c67f663b915-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/a41b3bb3e6b050b6c9067c67f663b915-Reviews.html", "metareview": "", "pdf_size": 4638604, "gs_citation": 2509, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11650483902238288010&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "aff": "University of Maryland, College Park; University of Maryland, College Park; United States Naval Academy; Cornell University; University of Maryland, College Park", "aff_domain": "cs.umd.edu;cs.umd.edu;usna.edu;cornell.edu;cs.umd.edu", "email": "cs.umd.edu;cs.umd.edu;usna.edu;cornell.edu;cs.umd.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/a41b3bb3e6b050b6c9067c67f663b915-Abstract.html", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Maryland;United States Naval Academy;Cornell University", "aff_unique_dep": ";;", "aff_unique_url": "https://www/umd.edu;https://www.usna.edu;https://www.cornell.edu", "aff_unique_abbr": "UMD;USNA;Cornell", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Wasserstein Distributionally Robust Kalman Filtering", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11810", "id": "11810", "author_site": "Soroosh Shafieezadeh Abadeh, Viet Anh Nguyen, Daniel Kuhn, Peyman Mohajerin Esfahani", "author": "Soroosh Shafieezadeh-Abadeh; Viet Anh Nguyen; Daniel Huhn; Peyman Mohajerin Esfahani", "abstract": "We study a distributionally robust mean square error estimation problem over a nonconvex Wasserstein ambiguity set containing only normal distributions. We show that the optimal estimator and the least favorable distribution form a Nash equilibrium. Despite the non-convex nature of the ambiguity set, we prove that the estimation problem is equivalent to a tractable convex program. We further devise a Frank-Wolfe algorithm for this convex program whose direction-searching subproblem can be solved in a quasi-closed form. Using these ingredients, we introduce a distributionally robust Kalman filter that hedges against model risk.", "bibtex": "@inproceedings{NEURIPS2018_15212f24,\n author = {Shafieezadeh Abadeh, Soroosh and Nguyen, Viet Anh and Kuhn, Daniel and Mohajerin Esfahani, Peyman Mohajerin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Wasserstein Distributionally Robust Kalman Filtering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/15212f24321aa2c3dc8e9acf820f3c15-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/15212f24321aa2c3dc8e9acf820f3c15-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/15212f24321aa2c3dc8e9acf820f3c15-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/15212f24321aa2c3dc8e9acf820f3c15-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/15212f24321aa2c3dc8e9acf820f3c15-Reviews.html", "metareview": "", "pdf_size": 1031155, "gs_citation": 135, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3916790984259735894&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne, CH-1015 Lausanne, Switzerland; \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne, CH-1015 Lausanne, Switzerland; \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne, CH-1015 Lausanne, Switzerland; Delft Center for Systems and Control, TU Delft, The Netherlands", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;tudelft.nl", "email": "epfl.ch;epfl.ch;epfl.ch;tudelft.nl", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/15212f24321aa2c3dc8e9acf820f3c15-Abstract.html", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "EPFL;Delft University of Technology", "aff_unique_dep": ";Delft Center for Systems and Control", "aff_unique_url": "https://www.epfl.ch;https://www.tudelft.nl", "aff_unique_abbr": "EPFL;TU Delft", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Lausanne;Delft", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Switzerland;Netherlands" }, { "title": "Wasserstein Variational Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11256", "id": "11256", "author_site": "Luca Ambrogioni, Umut G\u00fc\u00e7l\u00fc, Ya\u011fmur G\u00fc\u00e7l\u00fct\u00fcrk, Max Hinne, Marcel A. J. van Gerven, Eric Maris", "author": "Luca Ambrogioni; Umut G\u00fc\u00e7l\u00fc; Ya\u011fmur G\u00fc\u00e7l\u00fct\u00fcrk; Max Hinne; Marcel A. J. van Gerven; Eric Maris", "abstract": "This paper introduces Wasserstein variational inference, a new form of approximate Bayesian inference based on optimal transport theory. Wasserstein variational inference uses a new family of divergences that includes both f-divergences and the Wasserstein distance as special cases. The gradients of the Wasserstein variational loss are obtained by backpropagating through the Sinkhorn iterations. This technique results in a very stable likelihood-free training method that can be used with implicit distributions and probabilistic programs. Using the Wasserstein variational inference framework, we introduce several new forms of autoencoders and test their robustness and performance against existing variational autoencoding techniques.", "bibtex": "@inproceedings{NEURIPS2018_2c89109d,\n author = {Ambrogioni, Luca and G\\\"{u}\\c{c}l\\\"{u}, Umut and G\\\"{u}\\c{c}l\\\"{u}t\\\"{u}rk, Ya\\u{g}mur and Hinne, Max and van Gerven, Marcel A. J. and Maris, Eric},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Wasserstein Variational Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/2c89109d42178de8a367c0228f169bf8-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/2c89109d42178de8a367c0228f169bf8-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/2c89109d42178de8a367c0228f169bf8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/2c89109d42178de8a367c0228f169bf8-Reviews.html", "metareview": "", "pdf_size": 1410369, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6451924518029579411&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Radboud University; Radboud University; Radboud University; University of Amsterdam; Radboud University; Radboud University", "aff_domain": "donders.ru.nl;donders.ru.nl;donders.ru.nl;uva.nl;donders.ru.nl;donders.ru.nl", "email": "donders.ru.nl;donders.ru.nl;donders.ru.nl;uva.nl;donders.ru.nl;donders.ru.nl", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/2c89109d42178de8a367c0228f169bf8-Abstract.html", "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Radboud University;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.ru.nl;https://www.uva.nl", "aff_unique_abbr": "RU;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Watch Your Step: Learning Node Embeddings via Graph Attention", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11875", "id": "11875", "author_site": "Sami Abu-El-Haija, Bryan Perozzi, Rami Al-Rfou, Alexander Alemi", "author": "Sami Abu-El-Haija; Bryan Perozzi; Rami Al-Rfou; Alexander A Alemi", "abstract": "Graph embedding methods represent nodes in a continuous vector space,\npreserving different types of relational information from the graph.\nThere are many hyper-parameters to these methods (e.g. the length of a random walk) which have to be manually tuned for every graph.\nIn this paper, we replace previously fixed hyper-parameters with trainable ones that we automatically learn via backpropagation. \nIn particular, we propose a novel attention model on the power series of the transition matrix, which guides the random walk to optimize an upstream objective.\nUnlike previous approaches to attention models, the method that we propose utilizes attention parameters exclusively on the data itself (e.g. on the random walk), and are not used by the model for inference.\nWe experiment on link prediction tasks, as we aim to produce embeddings that best-preserve the graph structure, generalizing to unseen information. \nWe improve state-of-the-art results on a comprehensive suite of real-world graph datasets including social, collaboration, and biological networks, where we observe that our graph attention model can reduce the error by up to 20\\%-40\\%.\nWe show that our automatically-learned attention parameters can vary significantly per graph, and correspond to the optimal choice of hyper-parameter if we manually tune existing methods.", "bibtex": "@inproceedings{NEURIPS2018_8a94ecfa,\n author = {Abu-El-Haija, Sami and Perozzi, Bryan and Al-Rfou, Rami and Alemi, Alexander A},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Watch Your Step: Learning Node Embeddings via Graph Attention},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/8a94ecfa54dcb88a2fa993bfa6388f9e-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/8a94ecfa54dcb88a2fa993bfa6388f9e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/8a94ecfa54dcb88a2fa993bfa6388f9e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/8a94ecfa54dcb88a2fa993bfa6388f9e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/8a94ecfa54dcb88a2fa993bfa6388f9e-Reviews.html", "metareview": "", "pdf_size": 468890, "gs_citation": 298, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1677540555393938447&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Information Sciences Institute, University of Southern California; Google AI, New York City, NY; Google AI, Mountain View, CA; Google AI, Mountain View, CA", "aff_domain": "isi.edu;acm.org;google.com;google.com", "email": "isi.edu;acm.org;google.com;google.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/8a94ecfa54dcb88a2fa993bfa6388f9e-Abstract.html", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Southern California;Google", "aff_unique_dep": "Information Sciences Institute;Google AI", "aff_unique_url": "https://www.usc.edu;https://ai.google", "aff_unique_abbr": "USC;Google AI", "aff_campus_unique_index": "0;1;2;2", "aff_campus_unique": "Los Angeles;New York City;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Wavelet regression and additive models for irregularly spaced data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11856", "id": "11856", "author_site": "Asad Haris, Ali Shojaie, Noah Simon", "author": "Asad Haris; Ali Shojaie; Noah Simon", "abstract": "We present a novel approach for nonparametric regression using wavelet basis functions. Our proposal, waveMesh, can be applied to non-equispaced data with sample size not necessarily a power of 2. We develop an efficient proximal gradient descent algorithm for computing the estimator and establish adaptive minimax convergence rates. The main appeal of our approach is that it naturally extends to additive and sparse additive models for a potentially large number of covariates. We prove minimax optimal convergence rates under a weak compatibility condition for sparse additive models. The compatibility condition holds when we have a small number of covariates. Additionally, we establish convergence rates for when the condition is not met. We complement our theoretical results with empirical studies comparing waveMesh to existing methods.", "bibtex": "@inproceedings{NEURIPS2018_bf764716,\n author = {Haris, Asad and Shojaie, Ali and Simon, Noah},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Wavelet regression and additive models for irregularly spaced data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/bf764716fe1a58cb07f8a377ec25c16d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/bf764716fe1a58cb07f8a377ec25c16d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/bf764716fe1a58cb07f8a377ec25c16d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/bf764716fe1a58cb07f8a377ec25c16d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/bf764716fe1a58cb07f8a377ec25c16d-Reviews.html", "metareview": "", "pdf_size": 315585, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13538347058695103986&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Department of Biostatistics, University of Washington; Department of Biostatistics, University of Washington; Department of Biostatistics, University of Washington", "aff_domain": "uw.edu;uw.edu;uw.edu", "email": "uw.edu;uw.edu;uw.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/bf764716fe1a58cb07f8a377ec25c16d-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Department of Biostatistics", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Weakly Supervised Dense Event Captioning in Videos", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11311", "id": "11311", "author_site": "Xin Wang, Wenbing Huang, Chuang Gan, Jingdong Wang, Wenwu Zhu, Junzhou Huang", "author": "Xuguang Duan; Wenbing Huang; Chuang Gan; Jingdong Wang; Wenwu Zhu; Junzhou Huang", "abstract": "Dense event captioning aims to detect and describe all events of interest contained in a video. Despite the advanced development in this area, existing methods tackle this task by making use of dense temporal annotations, which is dramatically source-consuming. This paper formulates a new problem: weakly supervised dense event captioning, which does not require temporal segment annotations for model training. Our solution is based on the one-to-one correspondence assumption, each caption describes one temporal segment, and each temporal segment has one caption, which holds in current benchmark datasets and most real world cases. We decompose the problem into a pair of dual problems: event captioning and sentence localization and present a cycle system to train our model. Extensive experimental results are provided to demonstrate the ability of our model on both dense event captioning and sentence localization in videos.", "bibtex": "@inproceedings{NEURIPS2018_49af6c4e,\n author = {Duan, Xuguang and Huang, Wenbing and Gan, Chuang and Wang, Jingdong and Zhu, Wenwu and Huang, Junzhou},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Weakly Supervised Dense Event Captioning in Videos},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/49af6c4e558a7569d80eee2e035e2bd7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/49af6c4e558a7569d80eee2e035e2bd7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/49af6c4e558a7569d80eee2e035e2bd7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/49af6c4e558a7569d80eee2e035e2bd7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/49af6c4e558a7569d80eee2e035e2bd7-Reviews.html", "metareview": "", "pdf_size": 1611770, "gs_citation": 188, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10574408099006648445&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Tsinghua University; Tencent AI Lab; MIT-IBM Watson AI Lab; Microsoft Research Asia; Tsinghua University; Tencent AI Lab", "aff_domain": "outlook.com;126.com;gmail.com;microsoft.com;tsinghua.edu.cn;tencent.com", "email": "outlook.com;126.com;gmail.com;microsoft.com;tsinghua.edu.cn;tencent.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/49af6c4e558a7569d80eee2e035e2bd7-Abstract.html", "aff_unique_index": "0;1;2;3;0;1", "aff_unique_norm": "Tsinghua University;Tencent;Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";Tencent AI Lab;IBM Watson AI Lab;Research", "aff_unique_url": "https://www.tsinghua.edu.cn;https://ai.tencent.com;https://www.mitibmwatsonailab.org;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "THU;Tencent AI Lab;MIT-IBM AI Lab;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "title": "When do random forests fail?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11304", "id": "11304", "author_site": "Cheng Tang, Damien Garreau, Ulrike von Luxburg", "author": "Cheng Tang; Damien Garreau; Ulrike von Luxburg", "abstract": "Random forests are learning algorithms that build large collections of random trees and make predictions by averaging the individual tree predictions.\nIn this paper, we consider various tree constructions and examine how the choice of parameters affects the generalization error of the resulting random forests as the sample size goes to infinity. \nWe show that subsampling of data points during the tree construction phase is important: Forests can become inconsistent with either no subsampling or too severe subsampling. \nAs a consequence, even highly randomized trees can lead to inconsistent forests if no subsampling is used, which implies that some of the commonly used setups for random forests can be inconsistent.", "bibtex": "@inproceedings{NEURIPS2018_204da255,\n author = {Tang, Cheng and Garreau, Damien and von Luxburg, Ulrike},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {When do random forests fail?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/204da255aea2cd4a75ace6018fad6b4d-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/204da255aea2cd4a75ace6018fad6b4d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/204da255aea2cd4a75ace6018fad6b4d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/204da255aea2cd4a75ace6018fad6b4d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/204da255aea2cd4a75ace6018fad6b4d-Reviews.html", "metareview": "", "pdf_size": 295438, "gs_citation": 119, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11833545062049309174&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "George Washington University; Max Planck Institute for Intelligent Systems; University of T\u00a8ubingen + Max Planck Institute for Intelligent Systems", "aff_domain": "gwu.edu;tuebingen.mpg.de;informatik.uni-tuebingen.de", "email": "gwu.edu;tuebingen.mpg.de;informatik.uni-tuebingen.de", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/204da255aea2cd4a75ace6018fad6b4d-Abstract.html", "aff_unique_index": "0;1;2+1", "aff_unique_norm": "George Washington University;Max Planck Institute for Intelligent Systems;University of T\u00fcbingen", "aff_unique_dep": ";Intelligent Systems;", "aff_unique_url": "https://www.gwu.edu;https://www.mpi-is.mpg.de;https://www.uni-tuebingen.de/", "aff_unique_abbr": "GWU;MPI-IS;Uni T\u00fcbingen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1+1", "aff_country_unique": "United States;Germany" }, { "title": "Where Do You Think You're Going?: Inferring Beliefs about Dynamics from Behavior", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11161", "id": "11161", "author_site": "Sid Reddy, Anca Dragan, Sergey Levine", "author": "Sid Reddy; Anca Dragan; Sergey Levine", "abstract": "Inferring intent from observed behavior has been studied extensively within the frameworks of Bayesian inverse planning and inverse reinforcement learning. These methods infer a goal or reward function that best explains the actions of the observed agent, typically a human demonstrator. Another agent can use this inferred intent to predict, imitate, or assist the human user. However, a central assumption in inverse reinforcement learning is that the demonstrator is close to optimal. While models of suboptimal behavior exist, they typically assume that suboptimal actions are the result of some type of random noise or a known cognitive bias, like temporal inconsistency. In this paper, we take an alternative approach, and model suboptimal behavior as the result of internal model misspecification: the reason that user actions might deviate from near-optimal actions is that the user has an incorrect set of beliefs about the rules -- the dynamics -- governing how actions affect the environment. Our insight is that while demonstrated actions may be suboptimal in the real world, they may actually be near-optimal with respect to the user's internal model of the dynamics. By estimating these internal beliefs from observed behavior, we arrive at a new method for inferring intent. We demonstrate in simulation and in a user study with 12 participants that this approach enables us to more accurately model human intent, and can be used in a variety of applications, including offering assistance in a shared autonomy framework and inferring human preferences.", "bibtex": "@inproceedings{NEURIPS2018_6f2268bd,\n author = {Reddy, Sid and Dragan, Anca and Levine, Sergey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Where Do You Think You\\textquotesingle re Going?: Inferring Beliefs about Dynamics from Behavior},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/6f2268bd1d3d3ebaabb04d6b5d099425-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/6f2268bd1d3d3ebaabb04d6b5d099425-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/6f2268bd1d3d3ebaabb04d6b5d099425-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/6f2268bd1d3d3ebaabb04d6b5d099425-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/6f2268bd1d3d3ebaabb04d6b5d099425-Reviews.html", "metareview": "", "pdf_size": 2164966, "gs_citation": 132, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11438620297016616954&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Electrical Engineering and Computer Science, University of California, Berkeley; Department of Electrical Engineering and Computer Science, University of California, Berkeley; Department of Electrical Engineering and Computer Science, University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "email": "berkeley.edu;berkeley.edu;berkeley.edu", "github": "", "project": "https://sites.google.com/view/inferring-internal-dynamics", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/6f2268bd1d3d3ebaabb04d6b5d099425-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Department of Electrical Engineering and Computer Science", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Which Neural Net Architectures Give Rise to Exploding and Vanishing Gradients?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11081", "id": "11081", "author": "Boris Hanin", "abstract": "We give a rigorous analysis of the statistical behavior of gradients in a randomly initialized fully connected network N with ReLU activations. Our results show that the empirical variance of the squares of the entries in the input-output Jacobian of N is exponential in a simple architecture-dependent constant beta, given by the sum of the reciprocals of the hidden layer widths. When beta is large, the gradients computed by N at initialization vary wildly. Our approach complements the mean field theory analysis of random networks. From this point of view, we rigorously compute finite width corrections to the statistics of gradients at the edge of chaos.", "bibtex": "@inproceedings{NEURIPS2018_13f9896d,\n author = {Hanin, Boris},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Which Neural Net Architectures Give Rise to Exploding and Vanishing Gradients?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/13f9896df61279c928f19721878fac41-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/13f9896df61279c928f19721878fac41-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/13f9896df61279c928f19721878fac41-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/13f9896df61279c928f19721878fac41-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/13f9896df61279c928f19721878fac41-Reviews.html", "metareview": "", "pdf_size": 2245447, "gs_citation": 387, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3363403918200680804&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Mathematics, Texas A& M University", "aff_domain": "math.tamu.edu", "email": "math.tamu.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/13f9896df61279c928f19721878fac41-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "Department of Mathematics", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Why Is My Classifier Discriminatory?", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11355", "id": "11355", "author_site": "Irene Chen, Fredrik Johansson, David Sontag", "author": "Irene Chen; Fredrik D Johansson; David Sontag", "abstract": "Recent attempts to achieve fairness in predictive models focus on the balance between fairness and accuracy. In sensitive applications such as healthcare or criminal justice, this trade-off is often undesirable as any increase in prediction error could have devastating consequences. In this work, we argue that the fairness of predictions should be evaluated in context of the data, and that unfairness induced by inadequate samples sizes or unmeasured predictive variables should be addressed through data collection, rather than by constraining the model. We decompose cost-based metrics of discrimination into bias, variance, and noise, and propose actions aimed at estimating and reducing each term. Finally, we perform case-studies on prediction of income, mortality, and review ratings, confirming the value of this analysis. We find that data collection is often a means to reduce discrimination without sacrificing accuracy.", "bibtex": "@inproceedings{NEURIPS2018_1f1baa5b,\n author = {Chen, Irene and Johansson, Fredrik D and Sontag, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Why Is My Classifier Discriminatory?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/1f1baa5b8edac74eb4eaa329f14a0361-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/1f1baa5b8edac74eb4eaa329f14a0361-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/1f1baa5b8edac74eb4eaa329f14a0361-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/1f1baa5b8edac74eb4eaa329f14a0361-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/1f1baa5b8edac74eb4eaa329f14a0361-Reviews.html", "metareview": "", "pdf_size": 483004, "gs_citation": 573, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9129079794202950275&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "MIT; MIT; MIT", "aff_domain": "mit.edu;mit.edu;csail.mit.edu", "email": "mit.edu;mit.edu;csail.mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/1f1baa5b8edac74eb4eaa329f14a0361-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Why so gloomy? A Bayesian explanation of human pessimism bias in the multi-armed bandit task", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11506", "id": "11506", "author_site": "Dalin Guo, Angela Yu", "author": "Dalin Guo; Angela J. Yu", "abstract": "How humans make repeated choices among options with imperfectly known reward outcomes is an important problem in psychology and neuroscience. This is often studied using multi-armed bandits, which is also frequently studied in machine learning. We present data from a human stationary bandit experiment, in which we vary the average abundance and variability of reward availability (mean and variance of reward rate distributions). Surprisingly, we find subjects significantly underestimate prior mean of reward rates -- based on their self-report, at the end of a game, on their reward expectation of non-chosen arms. Previously, human learning in the bandit task was found to be well captured by a Bayesian ideal learning model, the Dynamic Belief Model (DBM), albeit under an incorrect generative assumption of the temporal structure - humans assume reward rates can change over time even though they are actually fixed. We find that the \"pessimism bias\" in the bandit task is well captured by the prior mean of DBM when fitted to human choices; but it is poorly captured by the prior mean of the Fixed Belief Model (FBM), an alternative Bayesian model that (correctly) assumes reward rates to be constants. This pessimism bias is also incompletely captured by a simple reinforcement learning model (RL) commonly used in neuroscience and psychology, in terms of fitted initial Q-values. While it seems sub-optimal, and thus mysterious, that humans have an underestimated prior reward expectation, our simulations show that an underestimated prior mean helps to maximize long-term gain, if the observer assumes volatility when reward rates are stable and utilizes a softmax decision policy instead of the optimal one (obtainable by dynamic programming). This raises the intriguing possibility that the brain underestimates reward rates to compensate for the incorrect non-stationarity assumption in the generative model and a simplified decision policy.", "bibtex": "@inproceedings{NEURIPS2018_f55cadb9,\n author = {Guo, Dalin and Yu, Angela J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Why so gloomy? A Bayesian explanation of human pessimism bias in the multi-armed bandit task},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/f55cadb97eaff2ba1980e001b0bd9842-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/f55cadb97eaff2ba1980e001b0bd9842-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/f55cadb97eaff2ba1980e001b0bd9842-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/f55cadb97eaff2ba1980e001b0bd9842-Reviews.html", "metareview": "", "pdf_size": 1242679, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3744403517997128428&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Department of Cognitive Science, University of California San Diego, La Jolla, CA 92093; Department of Cognitive Science, University of California San Diego, La Jolla, CA 92093", "aff_domain": "ucsd.edu;ucsd.edu", "email": "ucsd.edu;ucsd.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/f55cadb97eaff2ba1980e001b0bd9842-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "Department of Cognitive Science", "aff_unique_url": "https://ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "La Jolla", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "With Friends Like These, Who Needs Adversaries?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/12016", "id": "12016", "author_site": "Saumya Jetley, Nicholas Lord, Philip Torr", "author": "Saumya Jetley; Nicholas Lord; Philip Torr", "abstract": "The vulnerability of deep image classification networks to adversarial attack is now well known, but less well understood. Via a novel experimental analysis, we illustrate some facts about deep convolutional networks for image classification that shed new light on their behaviour and how it connects to the problem of adversaries. In short, the celebrated performance of these networks and their vulnerability to adversarial attack are simply two sides of the same coin: the input image-space directions along which the networks are most vulnerable to attack are the same directions which they use to achieve their classification performance in the first place. We develop this result in two main steps. The first uncovers the fact that classes tend to be associated with specific image-space directions. This is shown by an examination of the class-score outputs of nets as functions of 1D movements along these directions. This provides a novel perspective on the existence of universal adversarial perturbations. The second is a clear demonstration of the tight coupling between classification performance and vulnerability to adversarial attack within the spaces spanned by these directions. Thus, our analysis resolves the apparent contradiction between accuracy and vulnerability. It provides a new perspective on much of the prior art and reveals profound implications for efforts to construct neural nets that are both accurate and robust to adversarial attack.", "bibtex": "@inproceedings{NEURIPS2018_803a82de,\n author = {Jetley, Saumya and Lord, Nicholas and Torr, Philip},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {With Friends Like These, Who Needs Adversaries?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/803a82dee7e3fbb3438a149508484250-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/803a82dee7e3fbb3438a149508484250-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/803a82dee7e3fbb3438a149508484250-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/803a82dee7e3fbb3438a149508484250-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/803a82dee7e3fbb3438a149508484250-Reviews.html", "metareview": "", "pdf_size": 1204709, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5740676327222968631&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 11, "aff": "Department of Engineering Science, University of Oxford + Oxford Research Group, FiveAI Ltd.; Department of Engineering Science, University of Oxford + Oxford Research Group, FiveAI Ltd.; Department of Engineering Science, University of Oxford + Oxford Research Group, FiveAI Ltd.", "aff_domain": "robots.ox.ac.uk;robots.ox.ac.uk;robots.ox.ac.uk", "email": "robots.ox.ac.uk;robots.ox.ac.uk;robots.ox.ac.uk", "github": "https://github.com/torrvision/whoneedsadversaries", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/803a82dee7e3fbb3438a149508484250-Abstract.html", "aff_unique_index": "0+1;0+1;0+1", "aff_unique_norm": "University of Oxford;Oxford Research Group", "aff_unique_dep": "Department of Engineering Science;", "aff_unique_url": "https://www.ox.ac.uk;", "aff_unique_abbr": "Oxford;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0+0;0+0;0+0", "aff_country_unique": "United Kingdom" }, { "title": "Zero-Shot Transfer with Deictic Object-Oriented Representation in Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11239", "id": "11239", "author_site": "Ofir Marom, Benjamin Rosman", "author": "Ofir Marom; Benjamin Rosman", "abstract": "Object-oriented representations in reinforcement learning have shown promise in transfer learning, with previous research introducing a propositional object-oriented framework that has provably efficient learning bounds with respect to sample complexity. However, this framework has limitations in terms of the classes of tasks it can efficiently learn. In this paper we introduce a novel deictic object-oriented framework that has provably efficient learning bounds and can solve a broader range of tasks. Additionally, we show that this framework is capable of zero-shot transfer of transition dynamics across tasks and demonstrate this empirically for the Taxi and Sokoban domains.", "bibtex": "@inproceedings{NEURIPS2018_df0aab05,\n author = {Marom, Ofir and Rosman, Benjamin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Zero-Shot Transfer with Deictic Object-Oriented Representation in Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/df0aab058ce179e4f7ab135ed4e641a9-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/df0aab058ce179e4f7ab135ed4e641a9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/df0aab058ce179e4f7ab135ed4e641a9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/df0aab058ce179e4f7ab135ed4e641a9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/df0aab058ce179e4f7ab135ed4e641a9-Reviews.html", "metareview": "", "pdf_size": 385138, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6290380027959429087&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "University of the Witwatersrand, Johannesburg, South Africa; University of the Witwatersrand, Johannesburg, South Africa + Council for Scienti\ufb01c and Industrial Research, Pretoria, South Africa", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/df0aab058ce179e4f7ab135ed4e641a9-Abstract.html", "aff_unique_index": "0;0+1", "aff_unique_norm": "University of the Witwatersrand;Council for Scientific and Industrial Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.wits.ac.za;https://www.csir.co.za", "aff_unique_abbr": "Wits;CSIR", "aff_campus_unique_index": "0;0+1", "aff_campus_unique": "Johannesburg;Pretoria", "aff_country_unique_index": "0;0+0", "aff_country_unique": "South Africa" }, { "title": "Zeroth-Order Stochastic Variance Reduction for Nonconvex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11372", "id": "11372", "author_site": "Sijia Liu, Bhavya Kailkhura, Pin-Yu Chen, Paishun Ting, Shiyu Chang, Lisa Amini", "author": "Sijia Liu; Bhavya Kailkhura; Pin-Yu Chen; Paishun Ting; Shiyu Chang; Lisa Amini", "abstract": "As application demands for zeroth-order (gradient-free) optimization accelerate, the need for variance reduced and faster converging approaches is also intensifying. This paper addresses these challenges by presenting: a) a comprehensive theoretical analysis of variance reduced zeroth-order (ZO) optimization, b) a novel variance reduced ZO algorithm, called ZO-SVRG, and c) an experimental evaluation of our approach in the context of two compelling applications, black-box chemical material classification and generation of adversarial examples from black-box deep neural network models. Our theoretical analysis uncovers an essential difficulty in the analysis of ZO-SVRG: the unbiased assumption on gradient estimates no longer holds. We prove that compared to its first-order counterpart, ZO-SVRG with a two-point random gradient estimator could suffer an additional error of order $O(1/b)$, where $b$ is the mini-batch size. To mitigate this error, we propose two accelerated versions of ZO-SVRG utilizing \n variance reduced gradient estimators, which achieve the best rate known for ZO stochastic optimization (in terms of iterations). Our extensive experimental results show that our approaches outperform other state-of-the-art ZO algorithms, and strike a balance between the convergence rate and the function query complexity.", "bibtex": "@inproceedings{NEURIPS2018_ba9a56ce,\n author = {Liu, Sijia and Kailkhura, Bhavya and Chen, Pin-Yu and Ting, Paishun and Chang, Shiyu and Amini, Lisa},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Zeroth-Order Stochastic Variance Reduction for Nonconvex Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/ba9a56ce0a9bfa26e8ed9e10b2cc8f46-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/ba9a56ce0a9bfa26e8ed9e10b2cc8f46-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/ba9a56ce0a9bfa26e8ed9e10b2cc8f46-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/ba9a56ce0a9bfa26e8ed9e10b2cc8f46-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/ba9a56ce0a9bfa26e8ed9e10b2cc8f46-Reviews.html", "metareview": "", "pdf_size": 624031, "gs_citation": 216, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=509604933384263192&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "MIT-IBM Watson AI Lab, IBM Research; Lawrence Livermore National Laboratory; MIT-IBM Watson AI Lab, IBM Research; University of Michigan, Ann Arbor; MIT-IBM Watson AI Lab, IBM Research; MIT-IBM Watson AI Lab, IBM Research", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/ba9a56ce0a9bfa26e8ed9e10b2cc8f46-Abstract.html", "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "IBM;Lawrence Livermore National Laboratory;University of Michigan", "aff_unique_dep": "AI Lab;;", "aff_unique_url": "https://www.ibmwatsonai.org/;https://www.llnl.gov;https://www.umich.edu", "aff_unique_abbr": "MIT-IBM AI Lab;LLNL;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Zeroth-order (Non)-Convex Stochastic Optimization via Conditional Gradient and Gradient Updates", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11347", "id": "11347", "author_site": "Krishnakumar Balasubramanian, Saeed Ghadimi", "author": "Krishnakumar Balasubramanian; Saeed Ghadimi", "abstract": "In this paper, we propose and analyze zeroth-order stochastic approximation algorithms for nonconvex and convex optimization. Specifically, we propose generalizations of the conditional gradient algorithm achieving rates similar to the standard stochastic gradient algorithm using only zeroth-order information. Furthermore, under a structural sparsity assumption, we first illustrate an implicit regularization phenomenon where the standard stochastic gradient algorithm with zeroth-order information adapts to the sparsity of the problem at hand by just varying the step-size. Next, we propose a truncated stochastic gradient algorithm with zeroth-order information, whose rate of convergence depends only poly-logarithmically on the dimensionality.", "bibtex": "@inproceedings{NEURIPS2018_36d75342,\n author = {Balasubramanian, Krishnakumar and Ghadimi, Saeed},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Zeroth-order (Non)-Convex Stochastic Optimization via Conditional Gradient and Gradient Updates},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/36d7534290610d9b7e9abed244dd2f28-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/36d7534290610d9b7e9abed244dd2f28-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/36d7534290610d9b7e9abed244dd2f28-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/36d7534290610d9b7e9abed244dd2f28-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/36d7534290610d9b7e9abed244dd2f28-Reviews.html", "metareview": "", "pdf_size": 385434, "gs_citation": 123, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4323412423655105707&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Department of Statistics, University of California, Davis; Department of Operations Research and Financial Engineering, Princeton University", "aff_domain": "ucdavis.edu;princeton.edu", "email": "ucdavis.edu;princeton.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/36d7534290610d9b7e9abed244dd2f28-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Davis;Princeton University", "aff_unique_dep": "Department of Statistics;Department of Operations Research and Financial Engineering", "aff_unique_url": "https://www.ucdavis.edu;https://www.princeton.edu", "aff_unique_abbr": "UC Davis;Princeton", "aff_campus_unique_index": "0", "aff_campus_unique": "Davis;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "cpSGD: Communication-efficient and differentially-private distributed SGD", "status": "Spotlight", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11727", "id": "11727", "author_site": "Naman Agarwal, Ananda Theertha Suresh, Felix Xinnan Yu, Sanjiv Kumar, Brendan McMahan", "author": "Naman Agarwal; Ananda Theertha Suresh; Felix Xinnan X Yu; Sanjiv Kumar; Brendan McMahan", "abstract": "Distributed stochastic gradient descent is an important subroutine in distributed learning. A setting of particular interest is when the clients are mobile devices, where two important concerns are communication efficiency and the privacy of the clients. Several recent works have focused on reducing the communication cost or introducing privacy guarantees, but none of the proposed communication efficient methods are known to be privacy preserving and none of the known privacy mechanisms are known to be communication efficient. To this end, we study algorithms that achieve both communication efficiency and differential privacy. For $d$ variables and $n \\approx d$ clients, the proposed method uses $\\cO(\\log \\log(nd))$ bits of communication per client per coordinate and ensures constant privacy.\n\nWe also improve previous analysis of the \\emph{Binomial mechanism} showing that it achieves nearly the same utility as the Gaussian mechanism, while requiring fewer representation bits, which can be of independent interest.", "bibtex": "@inproceedings{NEURIPS2018_21ce6891,\n author = {Agarwal, Naman and Suresh, Ananda Theertha and Yu, Felix Xinnan X and Kumar, Sanjiv and McMahan, Brendan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {cpSGD: Communication-efficient and differentially-private distributed SGD},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/21ce689121e39821d07d04faab328370-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/21ce689121e39821d07d04faab328370-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/21ce689121e39821d07d04faab328370-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/21ce689121e39821d07d04faab328370-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/21ce689121e39821d07d04faab328370-Reviews.html", "metareview": "", "pdf_size": 1136867, "gs_citation": 587, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12309073014506443947&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Google Brain, Princeton, NJ 08540; Google Research, New York, NY; Google Research, New York, NY; Google Research, New York, NY; Google Research, Seattle, WA", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/21ce689121e39821d07d04faab328370-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Brain", "aff_unique_url": "https://brain.google.com", "aff_unique_abbr": "Google Brain", "aff_campus_unique_index": "0;1;1;1;2", "aff_campus_unique": "Princeton;New York;Seattle", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "e-SNLI: Natural Language Inference with Natural Language Explanations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11907", "id": "11907", "author_site": "Oana-Maria Camburu, Tim Rockt\u00e4schel, Thomas Lukasiewicz, Phil Blunsom", "author": "Oana-Maria Camburu; Tim Rockt\u00e4schel; Thomas Lukasiewicz; Phil Blunsom", "abstract": "In order for machine learning to garner widespread public adoption, models must be able to provide interpretable and robust explanations for their decisions, as well as learn from human-provided explanations at train time. In this work, we extend the Stanford Natural Language Inference dataset with an additional layer of human-annotated natural language explanations of the entailment relations. We further implement models that incorporate these explanations into their training process and output them at test time. We show how our corpus of explanations, which we call e-SNLI, can be used for various goals, such as obtaining full sentence justifications of a model\u2019s decisions, improving universal sentence representations and transferring to out-of-domain NLI datasets. Our dataset thus opens up a range of research directions for using natural language explanations, both for improving models and for asserting their trust", "bibtex": "@inproceedings{NEURIPS2018_4c7a167b,\n author = {Camburu, Oana-Maria and Rockt\\\"{a}schel, Tim and Lukasiewicz, Thomas and Blunsom, Phil},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {e-SNLI: Natural Language Inference with Natural Language Explanations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/4c7a167bb329bd92580a99ce422d6fa6-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/4c7a167bb329bd92580a99ce422d6fa6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/4c7a167bb329bd92580a99ce422d6fa6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/4c7a167bb329bd92580a99ce422d6fa6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/4c7a167bb329bd92580a99ce422d6fa6-Reviews.html", "metareview": "", "pdf_size": 110966, "gs_citation": 709, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8864619741982773920&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, University of Oxford; Department of Computer Science, University College London; Department of Computer Science, University of Oxford + Alan Turing Institute, London, UK + DeepMind, London, UK; Department of Computer Science, University of Oxford + DeepMind, London, UK", "aff_domain": "cs.ox.ac.uk;ucl.ac.uk;cs.ox.ac.uk;cs.ox.ac.uk", "email": "cs.ox.ac.uk;ucl.ac.uk;cs.ox.ac.uk;cs.ox.ac.uk", "github": "https://github.com/OanaMariaCamburu/e-SNLI", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/4c7a167bb329bd92580a99ce422d6fa6-Abstract.html", "aff_unique_index": "0;1;0+2+3;0+3", "aff_unique_norm": "University of Oxford;University College London;Alan Turing Institute;DeepMind", "aff_unique_dep": "Department of Computer Science;Department of Computer Science;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.ucl.ac.uk;https://www.turing.ac.uk;https://deepmind.com", "aff_unique_abbr": "Oxford;UCL;ATI;DeepMind", "aff_campus_unique_index": "0;1;0+1+1;0+1", "aff_campus_unique": "Oxford;London", "aff_country_unique_index": "0;0;0+0+0;0+0", "aff_country_unique": "United Kingdom" }, { "title": "rho-POMDPs have Lipschitz-Continuous epsilon-Optimal Value Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2018/poster/11668", "id": "11668", "author_site": "Mathieu Fehr, Olivier Buffet, Vincent Thomas, Jilles Dibangoye", "author": "Mathieu Fehr; Olivier Buffet; Vincent Thomas; Jilles Dibangoye", "abstract": "Many state-of-the-art algorithms for solving Partially Observable Markov Decision Processes (POMDPs) rely on turning the problem into a \u201cfully observable\u201d problem\u2014a belief MDP\u2014and exploiting the piece-wise linearity and convexity (PWLC) of the optimal value function in this new state space (the belief simplex \u2206). This approach has been extended to solving \u03c1-POMDPs\u2014i.e., for information-oriented criteria\u2014when the reward \u03c1 is convex in \u2206. General \u03c1-POMDPs can also be turned into \u201cfully observable\u201d problems, but with no means to exploit the PWLC property. In this paper, we focus on POMDPs and \u03c1-POMDPs with \u03bb \u03c1 -Lipschitz reward function, and demonstrate that, for finite horizons, the optimal value function is Lipschitz-continuous. Then, value function approximators are proposed for both upper- and lower-bounding the optimal value function, which are shown to provide uniformly improvable bounds. This allows proposing two algorithms derived from HSVI which are empirically evaluated on various benchmark problems.", "bibtex": "@inproceedings{NEURIPS2018_de7f47e0,\n author = {Fehr, Mathieu and Buffet, Olivier and Thomas, Vincent and Dibangoye, Jilles},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {rho-POMDPs have Lipschitz-Continuous epsilon-Optimal Value Functions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/de7f47e09c8e05e6021ababdf6bc58e7-Paper.pdf},\n volume = {31},\n year = {2018}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2018/file/de7f47e09c8e05e6021ababdf6bc58e7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2018/file/de7f47e09c8e05e6021ababdf6bc58e7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2018/file/de7f47e09c8e05e6021ababdf6bc58e7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2018/file/de7f47e09c8e05e6021ababdf6bc58e7-Reviews.html", "metareview": "", "pdf_size": 416833, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14939684687474527322&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "\u00c9cole Normale Sup\u00e9rieure de la rue d\u2019Ulm, Paris, France; Universit\u00e9 de Lorraine, CNRS, Inria, LORIA, Nancy, France; Universit\u00e9 de Lorraine, CNRS, Inria, LORIA, Nancy, France; Universit\u00e9 de Lyon, INSA Lyon, Inria, CITI, Lyon, France", "aff_domain": "ens.fr;loria.fr;loria.fr;inria.fr", "email": "ens.fr;loria.fr;loria.fr;inria.fr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2018/hash/de7f47e09c8e05e6021ababdf6bc58e7-Abstract.html", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "\u00c9cole Normale Sup\u00e9rieure;Universit\u00e9 de Lorraine;Universit\u00e9 de Lyon", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ens.fr;https://www.univ-lorraine.fr;https://www.universitedelyon.fr", "aff_unique_abbr": "ENS;UL;UDL", "aff_campus_unique_index": "0;1;1;2", "aff_campus_unique": "Paris;Nancy;Lyon", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" } ]