[ { "title": "A Bandit Framework for Strategic Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7007", "id": "7007", "author_site": "Yang Liu, Yiling Chen", "author": "Yang Liu; Yiling Chen", "abstract": "We consider a learner's problem of acquiring data dynamically for training a regression model, where the training data are collected from strategic data sources. A fundamental challenge is to incentivize data holders to exert effort to improve the quality of their reported data, despite that the quality is not directly verifiable by the learner. In this work, we study a dynamic data acquisition process where data holders can contribute multiple times. Using a bandit framework, we leverage on the long-term incentive of future job opportunities to incentivize high-quality contributions. We propose a Strategic Regression-Upper Confidence Bound (SR-UCB) framework, an UCB-style index combined with a simple payment rule, where the index of a worker approximates the quality of his past contributions and is used by the learner to determine whether the worker receives future work. For linear regression and certain family of non-linear regression problems, we show that SR-UCB enables a $O(\\sqrt{\\log T/T})$-Bayesian Nash Equilibrium (BNE) where each worker exerting a target effort level that the learner has chosen, with $T$ being the number of data acquisition stages. The SR-UCB framework also has some other desirable properties: (1) The indexes can be updated in an online fashion (hence computationally light). (2) A slight variant, namely Private SR-UCB (PSR-UCB), is able to preserve $(O(\\log^{-1} T), O(\\log^{-1} T))$-differential privacy for workers' data, with only a small compromise on incentives (achieving $O(\\log^{6} T/\\sqrt{T})$-BNE).", "bibtex": "@inproceedings{NIPS2016_d79aac07,\n author = {Liu, Yang and Chen, Yiling},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Bandit Framework for Strategic Regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d79aac075930c83c2f1e369a511148fe-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d79aac075930c83c2f1e369a511148fe-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d79aac075930c83c2f1e369a511148fe-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d79aac075930c83c2f1e369a511148fe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d79aac075930c83c2f1e369a511148fe-Reviews.html", "metareview": "", "pdf_size": 184846, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16446178955735246211&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "School of Engineering and Applied Science, Harvard University; School of Engineering and Applied Science, Harvard University", "aff_domain": "seas.harvard.edu;seas.harvard.edu", "email": "seas.harvard.edu;seas.harvard.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d79aac075930c83c2f1e369a511148fe-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "School of Engineering and Applied Science", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Bayesian method for reducing bias in neural representational similarity analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6896", "id": "6896", "author_site": "Mingbo Cai, Nicolas W Schuck, Jonathan Pillow, Yael Niv", "author": "Mingbo Cai; Nicolas W Schuck; Jonathan W Pillow; Yael Niv", "abstract": "In neuroscience, the similarity matrix of neural activity patterns in response to different sensory stimuli or under different cognitive states reflects the structure of neural representational space. Existing methods derive point estimations of neural activity patterns from noisy neural imaging data, and the similarity is calculated from these point estimations. We show that this approach translates structured noise from estimated patterns into spurious bias structure in the resulting similarity matrix, which is especially severe when signal-to-noise ratio is low and experimental conditions cannot be fully randomized in a cognitive task. We propose an alternative Bayesian framework for computing representational similarity in which we treat the covariance structure of neural activity patterns as a hyper-parameter in a generative model of the neural data, and directly estimate this covariance structure from imaging data while marginalizing over the unknown activity patterns. Converting the estimated covariance structure into a correlation matrix offers a much less biased estimate of neural representational similarity. Our method can also simultaneously estimate a signal-to-noise map that informs where the learned representational structure is supported more strongly, and the learned covariance matrix can be used as a structured prior to constrain Bayesian estimation of neural activity patterns. Our code is freely available in Brain Imaging Analysis Kit (Brainiak) (https://github.com/IntelPNI/brainiak), a python toolkit for brain imaging analysis.", "bibtex": "@inproceedings{NIPS2016_b06f50d1,\n author = {Cai, Mingbo and Schuck, Nicolas W and Pillow, Jonathan W and Niv, Yael},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Bayesian method for reducing bias in neural representational similarity analysis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b06f50d1f89bd8b2a0fb771c1a69c2b0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b06f50d1f89bd8b2a0fb771c1a69c2b0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b06f50d1f89bd8b2a0fb771c1a69c2b0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b06f50d1f89bd8b2a0fb771c1a69c2b0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b06f50d1f89bd8b2a0fb771c1a69c2b0-Reviews.html", "metareview": "", "pdf_size": 2024196, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4614101203950408643&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 17, "aff": "Princeton Neuroscience Institute, Princeton University, Princeton, NJ 08544; Princeton Neuroscience Institute, Princeton University, Princeton, NJ 08544; Princeton Neuroscience Institute, Princeton University, Princeton, NJ 08544; Princeton Neuroscience Institute, Princeton University, Princeton, NJ 08544", "aff_domain": "princeton.edu;princeton.edu;princeton.edu;princeton.edu", "email": "princeton.edu;princeton.edu;princeton.edu;princeton.edu", "github": "https://github.com/IntelPNI/brainiak", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b06f50d1f89bd8b2a0fb771c1a69c2b0-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "Princeton Neuroscience Institute", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Princeton", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Bio-inspired Redundant Sensing Architecture", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8506", "id": "8506", "author_site": "Anh Tuan Nguyen, Jian Xu, Zhi Yang", "author": "Anh Tuan Nguyen; Jian Xu; Zhi Yang", "abstract": "Sensing is the process of deriving signals from the environment that allows artificial systems to interact with the physical world. The Shannon theorem specifies the maximum rate at which information can be acquired. However, this upper bound is hard to achieve in many man-made systems. The biological visual systems, on the other hand, have highly efficient signal representation and processing mechanisms that allow precise sensing. In this work, we argue that redundancy is one of the critical characteristics for such superior performance. We show architectural advantages by utilizing redundant sensing, including correction of mismatch error and significant precision enhancement. For a proof-of-concept demonstration, we have designed a heuristic-based analog-to-digital converter - a zero-dimensional quantizer. Through Monte Carlo simulation with the error probabilistic distribution as a priori, the performance approaching the Shannon limit is feasible. In actual measurements without knowing the error distribution, we observe at least 2-bit extra precision. The results may also help explain biological processes including the dominance of binocular vision, the functional roles of the fixational eye movements, and the structural mechanisms allowing hyperacuity.", "bibtex": "@inproceedings{NIPS2016_a9078e86,\n author = {Nguyen, Anh Tuan and Xu, Jian and Yang, Zhi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Bio-inspired Redundant Sensing Architecture},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a9078e8653368c9c291ae2f8b74012e7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a9078e8653368c9c291ae2f8b74012e7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a9078e8653368c9c291ae2f8b74012e7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a9078e8653368c9c291ae2f8b74012e7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a9078e8653368c9c291ae2f8b74012e7-Reviews.html", "metareview": "", "pdf_size": 1116725, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12690195227598997948&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Biomedical Engineering; Department of Biomedical Engineering; Department of Biomedical Engineering", "aff_domain": "umn.edu;umn.edu;umn.edu", "email": "umn.edu;umn.edu;umn.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a9078e8653368c9c291ae2f8b74012e7-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Department of Biomedical Engineering", "aff_unique_dep": "Biomedical Engineering", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "A Communication-Efficient Parallel Algorithm for Decision Tree", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7252", "id": "7252", "author_site": "Qi Meng, Guolin Ke, Taifeng Wang, Wei Chen, Qiwei Ye, Zhi-Ming Ma, Tie-Yan Liu", "author": "Qi Meng; Guolin Ke; Taifeng Wang; Wei Chen; Qiwei Ye; Zhi-Ming Ma; Tie-Yan Liu", "abstract": "Decision tree (and its extensions such as Gradient Boosting Decision Trees and Random Forest) is a widely used machine learning algorithm, due to its practical effectiveness and model interpretability. With the emergence of big data, there is an increasing need to parallelize the training process of decision tree. However, most existing attempts along this line suffer from high communication costs. In this paper, we propose a new algorithm, called \\emph{Parallel Voting Decision Tree (PV-Tree)}, to tackle this challenge. After partitioning the training data onto a number of (e.g., $M$) machines, this algorithm performs both local voting and global voting in each iteration. For local voting, the top-$k$ attributes are selected from each machine according to its local data. Then, the indices of these top attributes are aggregated by a server, and the globally top-$2k$ attributes are determined by a majority voting among these local candidates. Finally, the full-grained histograms of the globally top-$2k$ attributes are collected from local machines in order to identify the best (most informative) attribute and its split point. PV-Tree can achieve a very low communication cost (independent of the total number of attributes) and thus can scale out very well. Furthermore, theoretical analysis shows that this algorithm can learn a near optimal decision tree, since it can find the best attribute with a large probability. Our experiments on real-world datasets show that PV-Tree significantly outperforms the existing parallel decision tree algorithms in the tradeoff between accuracy and efficiency.", "bibtex": "@inproceedings{NIPS2016_10a5ab2d,\n author = {Meng, Qi and Ke, Guolin and Wang, Taifeng and Chen, Wei and Ye, Qiwei and Ma, Zhi-Ming and Liu, Tie-Yan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Communication-Efficient Parallel Algorithm for Decision Tree},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/10a5ab2db37feedfdeaab192ead4ac0e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/10a5ab2db37feedfdeaab192ead4ac0e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/10a5ab2db37feedfdeaab192ead4ac0e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/10a5ab2db37feedfdeaab192ead4ac0e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/10a5ab2db37feedfdeaab192ead4ac0e-Reviews.html", "metareview": "", "pdf_size": 468735, "gs_citation": 193, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1443065668113448271&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Peking University; Microsoft Research; Microsoft Research; Microsoft Research; Microsoft Research; Chinese Academy of Mathematics and Systems Science; Microsoft Research", "aff_domain": "pku.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;amt.ac.cn;microsoft.com", "email": "pku.edu.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;amt.ac.cn;microsoft.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/10a5ab2db37feedfdeaab192ead4ac0e-Abstract.html", "aff_unique_index": "0;1;1;1;1;2;1", "aff_unique_norm": "Peking University;Microsoft;Chinese Academy of Mathematics and Systems Science", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com/en-us/research;http://www.amss.ac.cn", "aff_unique_abbr": "Peking U;MSR;AMSS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;0;1", "aff_country_unique": "China;United States" }, { "title": "A Comprehensive Linear Speedup Analysis for Asynchronous Stochastic Parallel Optimization from Zeroth-Order to First-Order", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6993", "id": "6993", "author_site": "Xiangru Lian, Huan Zhang, Cho-Jui Hsieh, Yijun Huang, Ji Liu", "author": "Xiangru Lian; Huan Zhang; Cho-Jui Hsieh; Yijun Huang; Ji Liu", "abstract": "Asynchronous parallel optimization received substantial successes and extensive attention recently. One of core theoretical questions is how much speedup (or benefit) the asynchronous parallelization can bring to us. This paper provides a comprehensive and generic analysis to study the speedup property for a broad range of asynchronous parallel stochastic algorithms from the zeroth order to the first order methods. Our result recovers or improves existing analysis on special cases, provides more insights for understanding the asynchronous parallel behaviors, and suggests a novel asynchronous parallel zeroth order method for the first time. Our experiments provide novel applications of the proposed asynchronous parallel zeroth order method on hyper parameter tuning and model blending problems.", "bibtex": "@inproceedings{NIPS2016_db191505,\n author = {Lian, Xiangru and Zhang, Huan and Hsieh, Cho-Jui and Huang, Yijun and Liu, Ji},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Comprehensive Linear Speedup Analysis for Asynchronous Stochastic Parallel Optimization from Zeroth-Order to First-Order},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/db1915052d15f7815c8b88e879465a1e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/db1915052d15f7815c8b88e879465a1e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/db1915052d15f7815c8b88e879465a1e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/db1915052d15f7815c8b88e879465a1e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/db1915052d15f7815c8b88e879465a1e-Reviews.html", "metareview": "", "pdf_size": 166198, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11767054837953548628&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, University of Rochester, USA; Department of Electrical and Computer Engineering, University of California, Davis, USA; Department of Computer Science, University of California, Davis, USA; Department of Computer Science, University of Rochester, USA; Department of Computer Science, University of Rochester, USA", "aff_domain": "yandex.com;gmail.com;ucdavis.edu;gmail.com;gmail.com", "email": "yandex.com;gmail.com;ucdavis.edu;gmail.com;gmail.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/db1915052d15f7815c8b88e879465a1e-Abstract.html", "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "University of Rochester;University of California, Davis", "aff_unique_dep": "Department of Computer Science;Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.rochester.edu;https://www.ucdavis.edu", "aff_unique_abbr": "U of Rochester;UC Davis", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Davis", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Consistent Regularization Approach for Structured Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7356", "id": "7356", "author_site": "Carlo Ciliberto, Lorenzo Rosasco, Alessandro Rudi", "author": "Carlo Ciliberto; Lorenzo Rosasco; Alessandro Rudi", "abstract": "We propose and analyze a regularization approach for structured prediction problems. We characterize a large class of loss functions that allows to naturally embed structured outputs in a linear space. We exploit this fact to design learning algorithms using a surrogate loss approach and regularization techniques. We prove universal consistency and finite sample bounds characterizing the generalization properties of the proposed method. Experimental results are provided to demonstrate the practical usefulness of the proposed approach.", "bibtex": "@inproceedings{NIPS2016_88a839f2,\n author = {Ciliberto, Carlo and Rosasco, Lorenzo and Rudi, Alessandro},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Consistent Regularization Approach for Structured Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/88a839f2f6f1427879fc33ee4acf4f66-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/88a839f2f6f1427879fc33ee4acf4f66-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/88a839f2f6f1427879fc33ee4acf4f66-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/88a839f2f6f1427879fc33ee4acf4f66-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/88a839f2f6f1427879fc33ee4acf4f66-Reviews.html", "metareview": "", "pdf_size": 376608, "gs_citation": 93, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5442801382432637810&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Laboratory for Computational and Statistical Learning - Istituto Italiano di Tecnologia, Genova, Italy & Massachusetts Institute of Technology, Cambridge, MA 02139, USA; Laboratory for Computational and Statistical Learning - Istituto Italiano di Tecnologia, Genova, Italy & Massachusetts Institute of Technology, Cambridge, MA 02139, USA + Universit\u00e0 degli Studi di Genova, Genova, Italy; Laboratory for Computational and Statistical Learning - Istituto Italiano di Tecnologia, Genova, Italy & Massachusetts Institute of Technology, Cambridge, MA 02139, USA + Universit\u00e0 degli Studi di Genova, Genova, Italy", "aff_domain": "mit.edu;mit.edu;mit.edu", "email": "mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/88a839f2f6f1427879fc33ee4acf4f66-Abstract.html", "aff_unique_index": "0;0+1;0+1", "aff_unique_norm": "Istituto Italiano di Tecnologia;Universit\u00e0 degli Studi di Genova", "aff_unique_dep": "Laboratory for Computational and Statistical Learning;", "aff_unique_url": "https://www.iit.it;https://www.unige.it", "aff_unique_abbr": "IIT;UniGe", "aff_campus_unique_index": "0;0+0;0+0", "aff_campus_unique": "Genova", "aff_country_unique_index": "0;0+0;0+0", "aff_country_unique": "Italy" }, { "title": "A Constant-Factor Bi-Criteria Approximation Guarantee for k-means++", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7057", "id": "7057", "author": "Dennis Wei", "abstract": "This paper studies the $k$-means++ algorithm for clustering as well as the class of $D^\\ell$ sampling algorithms to which $k$-means++ belongs. It is shown that for any constant factor $\\beta > 1$, selecting $\\beta k$ cluster centers by $D^\\ell$ sampling yields a constant-factor approximation to the optimal clustering with $k$ centers, in expectation and without conditions on the dataset. This result extends the previously known $O(\\log k)$ guarantee for the case $\\beta = 1$ to the constant-factor bi-criteria regime. It also improves upon an existing constant-factor bi-criteria result that holds only with constant probability.", "bibtex": "@inproceedings{NIPS2016_357a6fdf,\n author = {Wei, Dennis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Constant-Factor Bi-Criteria Approximation Guarantee for k-means++},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/357a6fdf7642bf815a88822c447d9dc4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/357a6fdf7642bf815a88822c447d9dc4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/357a6fdf7642bf815a88822c447d9dc4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/357a6fdf7642bf815a88822c447d9dc4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/357a6fdf7642bf815a88822c447d9dc4-Reviews.html", "metareview": "", "pdf_size": 287316, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12339857656614994095&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "IBM Research", "aff_domain": "us.ibm.com", "email": "us.ibm.com", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/357a6fdf7642bf815a88822c447d9dc4-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "IBM", "aff_unique_dep": "IBM Research", "aff_unique_url": "https://www.ibm.com/research", "aff_unique_abbr": "IBM", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "A Credit Assignment Compiler for Joint Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7184", "id": "7184", "author_site": "Kai-Wei Chang, He He, Stephane Ross, Hal Daum\u00e9 III, John Langford", "author": "Kai-Wei Chang; He He; Stephane Ross; Hal Daume III; John Langford", "abstract": "Many machine learning applications involve jointly predicting multiple mutually dependent output variables. Learning to search is a family of methods where the complex decision problem is cast into a sequence of decisions via a search space. Although these methods have shown promise both in theory and in practice, implementing them has been burdensomely awkward. In this paper, we show the search space can be defined by an arbitrary imperative program, turning learning to search into a credit assignment compiler. Altogether with the algorithmic improvements for the compiler, we radically reduce the complexity of programming and the running time. We demonstrate the feasibility of our approach on multiple joint prediction tasks. In all cases, we obtain accuracies as high as alternative approaches, at drastically reduced execution and programming time.", "bibtex": "@inproceedings{NIPS2016_8d6dc35e,\n author = {Chang, Kai-Wei and He, He and Ross, Stephane and Daume III, Hal and Langford, John},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Credit Assignment Compiler for Joint Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8d6dc35e506fc23349dd10ee68dabb64-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8d6dc35e506fc23349dd10ee68dabb64-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8d6dc35e506fc23349dd10ee68dabb64-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8d6dc35e506fc23349dd10ee68dabb64-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8d6dc35e506fc23349dd10ee68dabb64-Reviews.html", "metareview": "", "pdf_size": 573678, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=462617891301565956&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of Virginia; University of Maryland; University of Maryland; Microsoft Research; Google", "aff_domain": "kwchang.net;cs.umd.edu;hal3.name;microsoft.com;google.com", "email": "kwchang.net;cs.umd.edu;hal3.name;microsoft.com;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8d6dc35e506fc23349dd10ee68dabb64-Abstract.html", "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "University of Virginia;University of Maryland;Microsoft;Google", "aff_unique_dep": ";;Microsoft Research;Google", "aff_unique_url": "https://www.virginia.edu;https://www/umd.edu;https://www.microsoft.com/en-us/research;https://www.google.com", "aff_unique_abbr": "UVA;UMD;MSR;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "A Locally Adaptive Normal Distribution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7066", "id": "7066", "author_site": "Georgios Arvanitidis, Lars K Hansen, S\u00f8ren Hauberg", "author": "Georgios Arvanitidis; Lars K. Hansen; S\u00f8ren Hauberg", "abstract": "The multivariate normal density is a monotonic function of the distance to the mean, and its ellipsoidal shape is due to the underlying Euclidean metric. We suggest to replace this metric with a locally adaptive, smoothly changing (Riemannian) metric that favors regions of high local density. The resulting locally adaptive normal distribution (LAND) is a generalization of the normal distribution to the \"manifold\" setting, where data is assumed to lie near a potentially low-dimensional manifold embedded in R^D. The LAND is parametric, depending only on a mean and a covariance, and is the maximum entropy distribution under the given metric. The underlying metric is, however, non-parametric. We develop a maximum likelihood algorithm to infer the distribution parameters that relies on a combination of gradient descent and Monte Carlo integration. We further extend the LAND to mixture models, and provide the corresponding EM algorithm. We demonstrate the efficiency of the LAND to fit non-trivial probability distributions over both synthetic data, and EEG measurements of human sleep.", "bibtex": "@inproceedings{NIPS2016_01931a69,\n author = {Arvanitidis, Georgios and Hansen, Lars K and Hauberg, S\\o ren},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Locally Adaptive Normal Distribution},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/01931a6925d3de09e5f87419d9d55055-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/01931a6925d3de09e5f87419d9d55055-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/01931a6925d3de09e5f87419d9d55055-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/01931a6925d3de09e5f87419d9d55055-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/01931a6925d3de09e5f87419d9d55055-Reviews.html", "metareview": "", "pdf_size": 3127343, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3939473033236749118&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Technical University of Denmark, Lyngby, Denmark + DTU Compute, Section for Cognitive Systems; Technical University of Denmark, Lyngby, Denmark + DTU Compute, Section for Cognitive Systems; Technical University of Denmark, Lyngby, Denmark + DTU Compute, Section for Cognitive Systems", "aff_domain": "dtu.dk;dtu.dk;dtu.dk", "email": "dtu.dk;dtu.dk;dtu.dk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/01931a6925d3de09e5f87419d9d55055-Abstract.html", "aff_unique_index": "0+0;0+0;0+0", "aff_unique_norm": "Technical University of Denmark", "aff_unique_dep": "", "aff_unique_url": "https://www.tek.dk", "aff_unique_abbr": "DTU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lyngby;", "aff_country_unique_index": "0+0;0+0;0+0", "aff_country_unique": "Denmark" }, { "title": "A Minimax Approach to Supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7173", "id": "7173", "author_site": "Farzan Farnia, David Tse", "author": "Farzan Farnia; David Tse", "abstract": "Given a task of predicting Y from X, a loss function L, and a set of probability distributions Gamma on (X,Y), what is the optimal decision rule minimizing the worst-case expected loss over Gamma? In this paper, we address this question by introducing a generalization of the maximum entropy principle. Applying this principle to sets of distributions with marginal on X constrained to be the empirical marginal, we provide a minimax interpretation of the maximum likelihood problem over generalized linear models as well as some popular regularization schemes. For quadratic and logarithmic loss functions we revisit well-known linear and logistic regression models. Moreover, for the 0-1 loss we derive a classifier which we call the minimax SVM. The minimax SVM minimizes the worst-case expected 0-1 loss over the proposed Gamma by solving a tractable optimization problem. We perform several numerical experiments to show the power of the minimax SVM in outperforming the SVM.", "bibtex": "@inproceedings{NIPS2016_7b1ce3d7,\n author = {Farnia, Farzan and Tse, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Minimax Approach to Supervised Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7b1ce3d73b70f1a7246e7b76a35fb552-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7b1ce3d73b70f1a7246e7b76a35fb552-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7b1ce3d73b70f1a7246e7b76a35fb552-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7b1ce3d73b70f1a7246e7b76a35fb552-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7b1ce3d73b70f1a7246e7b76a35fb552-Reviews.html", "metareview": "", "pdf_size": 746803, "gs_citation": 148, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7446796069484087280&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Electrical Engineering, Stanford University; Department of Electrical Engineering, Stanford University", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7b1ce3d73b70f1a7246e7b76a35fb552-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Department of Electrical Engineering", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Multi-Batch L-BFGS Method for Machine Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8504", "id": "8504", "author_site": "Albert Berahas, Jorge Nocedal, Martin Takac", "author": "Albert S Berahas; Jorge Nocedal; Martin Takac", "abstract": "The question of how to parallelize the stochastic gradient descent (SGD) method has received much attention in the literature. In this paper, we focus instead on batch methods that use a sizeable fraction of the training set at each iteration to facilitate parallelism, and that employ second-order information. In order to improve the learning process, we follow a multi-batch approach in which the batch changes at each iteration. This can cause difficulties because L-BFGS employs gradient differences to update the Hessian approximations, and when these gradients are computed using different data points the process can be unstable. This paper shows how to perform stable quasi-Newton updating in the multi-batch setting, illustrates the behavior of the algorithm in a distributed computing platform, and studies its convergence properties for both the convex and nonconvex cases.", "bibtex": "@inproceedings{NIPS2016_8ebda540,\n author = {Berahas, Albert S and Nocedal, Jorge and Takac, Martin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Multi-Batch L-BFGS Method for Machine Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8ebda540cbcc4d7336496819a46a1b68-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8ebda540cbcc4d7336496819a46a1b68-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8ebda540cbcc4d7336496819a46a1b68-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8ebda540cbcc4d7336496819a46a1b68-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8ebda540cbcc4d7336496819a46a1b68-Reviews.html", "metareview": "", "pdf_size": 688884, "gs_citation": 193, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6965843680970132854&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 17, "aff": "Northwestern University; Northwestern University; Lehigh University", "aff_domain": "u.northwestern.edu;northwestern.edu;gmail.com", "email": "u.northwestern.edu;northwestern.edu;gmail.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8ebda540cbcc4d7336496819a46a1b68-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Northwestern University;Lehigh University", "aff_unique_dep": ";", "aff_unique_url": "https://www.northwestern.edu;https://www.lehigh.edu", "aff_unique_abbr": "NU;Lehigh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "A Multi-step Inertial Forward-Backward Splitting Method for Non-convex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7301", "id": "7301", "author_site": "Jingwei Liang, Jalal Fadili, Gabriel Peyr\u00e9", "author": "Jingwei Liang; Jalal Fadili; Gabriel Peyr\u00e9", "abstract": "In this paper, we propose a multi-step inertial Forward--Backward splitting algorithm for minimizing the sum of two non-necessarily convex functions, one of which is proper lower semi-continuous while the other is differentiable with a Lipschitz continuous gradient. We first prove global convergence of the scheme with the help of the Kurdyka\u2013\u0141ojasiewicz property. Then, when the non-smooth part is also partly smooth relative to a smooth submanifold, we establish finite identification of the latter and provide sharp local linear convergence analysis. The proposed method is illustrated on a few problems arising from statistics and machine learning.", "bibtex": "@inproceedings{NIPS2016_ea6b2efb,\n author = {Liang, Jingwei and Fadili, Jalal and Peyr\\'{e}, Gabriel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Multi-step Inertial Forward-Backward Splitting Method for Non-convex Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ea6b2efbdd4255a9f1b3bbc6399b58f4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ea6b2efbdd4255a9f1b3bbc6399b58f4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ea6b2efbdd4255a9f1b3bbc6399b58f4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ea6b2efbdd4255a9f1b3bbc6399b58f4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ea6b2efbdd4255a9f1b3bbc6399b58f4-Reviews.html", "metareview": "", "pdf_size": 738678, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17055385817756533239&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Normandie Univ, ENSICAEN, CNRS, GREYC; Normandie Univ, ENSICAEN, CNRS, GREYC; CNRS, DMA, ENS Paris", "aff_domain": "greyc.ensicaen.fr;greyc.ensicaen.fr;ens.fr", "email": "greyc.ensicaen.fr;greyc.ensicaen.fr;ens.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ea6b2efbdd4255a9f1b3bbc6399b58f4-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Normandie University;\u00c9cole Normale Sup\u00e9rieure", "aff_unique_dep": "ENSICAEN;D\u00e9partement de Math\u00e9matiques et Applications", "aff_unique_url": "https://www.univ-normandie.fr;https://www.ens.fr", "aff_unique_abbr": "Univ Normandie;ENS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Paris", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "A Non-convex One-Pass Framework for Generalized Factorization Machine and Rank-One Matrix Sensing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7154", "id": "7154", "author_site": "Ming Lin, Jieping Ye", "author": "Ming Lin; Jieping Ye", "abstract": "We develop an efficient alternating framework for learning a generalized version of Factorization Machine (gFM) on steaming data with provable guarantees. When the instances are sampled from $d$ dimensional random Gaussian vectors and the target second order coefficient matrix in gFM is of rank $k$, our algorithm converges linearly, achieves $O(\\epsilon)$ recovery error after retrieving $O(k^{3}d\\log(1/\\epsilon))$ training instances, consumes $O(kd)$ memory in one-pass of dataset and only requires matrix-vector product operations in each iteration. The key ingredient of our framework is a construction of an estimation sequence endowed with a so-called Conditionally Independent RIP condition (CI-RIP). As special cases of gFM, our framework can be applied to symmetric or asymmetric rank-one matrix sensing problems, such as inductive matrix completion and phase retrieval.", "bibtex": "@inproceedings{NIPS2016_0a113ef6,\n author = {Lin, Ming and Ye, Jieping},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Non-convex One-Pass Framework for Generalized Factorization Machine and Rank-One Matrix Sensing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0a113ef6b61820daa5611c870ed8d5ee-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0a113ef6b61820daa5611c870ed8d5ee-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0a113ef6b61820daa5611c870ed8d5ee-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0a113ef6b61820daa5611c870ed8d5ee-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0a113ef6b61820daa5611c870ed8d5ee-Reviews.html", "metareview": "", "pdf_size": 335547, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14405857570123244975&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of Michigan; University of Michigan", "aff_domain": "umich.edu;umich.edu", "email": "umich.edu;umich.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0a113ef6b61820daa5611c870ed8d5ee-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Non-generative Framework and Convex Relaxations for Unsupervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6905", "id": "6905", "author_site": "Elad Hazan, Tengyu Ma", "author": "Elad Hazan; Tengyu Ma", "abstract": "We give a novel formal theoretical framework for unsupervised learning with two distinctive characteristics. First, it does not assume any generative model and based on a worst-case performance metric. Second, it is comparative, namely performance is measured with respect to a given hypothesis class. This allows to avoid known computational hardness results and improper algorithms based on convex relaxations. We show how several families of unsupervised learning models, which were previously only analyzed under probabilistic assumptions and are otherwise provably intractable, can be efficiently learned in our framework by convex optimization.", "bibtex": "@inproceedings{NIPS2016_be3e9d3f,\n author = {Hazan, Elad and Ma, Tengyu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Non-generative Framework and Convex Relaxations for Unsupervised Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/be3e9d3f7d70537357c67bb3f4086846-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/be3e9d3f7d70537357c67bb3f4086846-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/be3e9d3f7d70537357c67bb3f4086846-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/be3e9d3f7d70537357c67bb3f4086846-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/be3e9d3f7d70537357c67bb3f4086846-Reviews.html", "metareview": "", "pdf_size": 430890, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17388765771045422828&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Princeton University; Princeton University", "aff_domain": "cs.princeton.edu;cs.princeton.edu", "email": "cs.princeton.edu;cs.princeton.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/be3e9d3f7d70537357c67bb3f4086846-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Non-parametric Learning Method for Confidently Estimating Patient's Clinical State and Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7152", "id": "7152", "author_site": "William Hoiles, Mihaela van der Schaar", "author": "William Hoiles; Mihaela van der Schaar", "abstract": "Estimating patient's clinical state from multiple concurrent physiological streams plays an important role in determining if a therapeutic intervention is necessary and for triaging patients in the hospital. In this paper we construct a non-parametric learning algorithm to estimate the clinical state of a patient. The algorithm addresses several known challenges with clinical state estimation such as eliminating bias introduced by therapeutic intervention censoring, increasing the timeliness of state estimation while ensuring a sufficient accuracy, and the ability to detect anomalous clinical states. These benefits are obtained by combining the tools of non-parametric Bayesian inference, permutation testing, and generalizations of the empirical Bernstein inequality. The algorithm is validated using real-world data from a cancer ward in a large academic hospital.", "bibtex": "@inproceedings{NIPS2016_062ddb6c,\n author = {Hoiles, William and van der Schaar, Mihaela},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Non-parametric Learning Method for Confidently Estimating Patient\\textquotesingle s Clinical State and Dynamics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/062ddb6c727310e76b6200b7c71f63b5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/062ddb6c727310e76b6200b7c71f63b5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/062ddb6c727310e76b6200b7c71f63b5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/062ddb6c727310e76b6200b7c71f63b5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/062ddb6c727310e76b6200b7c71f63b5-Reviews.html", "metareview": "", "pdf_size": 397056, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11223511530666905902&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff": "Department of Electrical Engineering, University of California Los Angeles; Department of Electrical Engineering, University of California Los Angeles", "aff_domain": "ucla.edu;ee.ucla.edu", "email": "ucla.edu;ee.ucla.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/062ddb6c727310e76b6200b7c71f63b5-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "Department of Electrical Engineering", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Powerful Generative Model Using Random Weights for the Deep Image Representation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8517", "id": "8517", "author_site": "Kun He, Yan Wang, John Hopcroft", "author": "Kun He; Yan Wang; John Hopcroft", "abstract": "To what extent is the success of deep visualization due to the training? Could we do deep visualization using untrained, random weight networks? To address this issue, we explore new and powerful generative models for three popular deep visualization tasks using untrained, random weight convolutional neural networks. First we invert representations in feature spaces and reconstruct images from white noise inputs. The reconstruction quality is statistically higher than that of the same method applied on well trained networks with the same architecture. Next we synthesize textures using scaled correlations of representations in multiple layers and our results are almost indistinguishable with the original natural texture and the synthesized textures based on the trained network. Third, by recasting the content of an image in the style of various artworks, we create artistic images with high perceptual quality, highly competitive to the prior work of Gatys et al. on pretrained networks. To our knowledge this is the first demonstration of image representations using untrained deep neural networks. Our work provides a new and fascinating tool to study the representation of deep network architecture and sheds light on new understandings on deep visualization. It may possibly lead to a way to compare network architectures without training.", "bibtex": "@inproceedings{NIPS2016_58238e9a,\n author = {He, Kun and Wang, Yan and Hopcroft, John},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Powerful Generative Model Using Random Weights for the Deep Image Representation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/58238e9ae2dd305d79c2ebc8c1883422-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/58238e9ae2dd305d79c2ebc8c1883422-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/58238e9ae2dd305d79c2ebc8c1883422-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/58238e9ae2dd305d79c2ebc8c1883422-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/58238e9ae2dd305d79c2ebc8c1883422-Reviews.html", "metareview": "", "pdf_size": 8985817, "gs_citation": 100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13188388067164609320&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science and Technology, Huazhong University of Science and Technology; Department of Computer Science and Technology, Huazhong University of Science and Technology; Department of Computer Science, Cornell University", "aff_domain": "hust.edu.cn;hust.edu.cn;cs.cornell.edu", "email": "hust.edu.cn;hust.edu.cn;cs.cornell.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/58238e9ae2dd305d79c2ebc8c1883422-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Huazhong University of Science and Technology;Cornell University", "aff_unique_dep": "Department of Computer Science and Technology;Department of Computer Science", "aff_unique_url": "http://www.hust.edu.cn;https://www.cornell.edu", "aff_unique_abbr": "HUST;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "title": "A Probabilistic Framework for Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7285", "id": "7285", "author_site": "Ankit Patel, Tan Nguyen, Richard Baraniuk", "author": "Ankit B Patel; Minh Tan Nguyen; Richard Baraniuk", "abstract": "We develop a probabilistic framework for deep learning based on the Deep Rendering Mixture Model (DRMM), a new generative probabilistic model that explicitly capture variations in data due to latent task nuisance variables. We demonstrate that max-sum inference in the DRMM yields an algorithm that exactly reproduces the operations in deep convolutional neural networks (DCNs), providing a first principles derivation. Our framework provides new insights into the successes and shortcomings of DCNs as well as a principled route to their improvement. DRMM training via the Expectation-Maximization (EM) algorithm is a powerful alternative to DCN back-propagation, and initial training results are promising. Classification based on the DRMM and other variants outperforms DCNs in supervised digit classification, training 2-3x faster while achieving similar accuracy. Moreover, the DRMM is applicable to semi-supervised and unsupervised learning tasks, achieving results that are state-of-the-art in several categories on the MNIST benchmark and comparable to state of the art on the CIFAR10 benchmark.", "bibtex": "@inproceedings{NIPS2016_c70daf24,\n author = {Patel, Ankit B and Nguyen, Minh Tan and Baraniuk, Richard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Probabilistic Framework for Deep Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c70daf247944fe3add32218f914c75a6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c70daf247944fe3add32218f914c75a6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c70daf247944fe3add32218f914c75a6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c70daf247944fe3add32218f914c75a6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c70daf247944fe3add32218f914c75a6-Reviews.html", "metareview": "", "pdf_size": 2719797, "gs_citation": 91, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12419299379862846464&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c70daf247944fe3add32218f914c75a6-Abstract.html" }, { "title": "A Probabilistic Model of Social Decision Making based on Reward Maximization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6902", "id": "6902", "author_site": "Koosha Khalvati, Seongmin A. Park, Jean-Claude Dreher, Rajesh PN Rao", "author": "Koosha Khalvati; Seongmin A. Park; Jean-Claude Dreher; Rajesh P. Rao", "abstract": "A fundamental problem in cognitive neuroscience is how humans make decisions, act, and behave in relation to other humans. Here we adopt the hypothesis that when we are in an interactive social setting, our brains perform Bayesian inference of the intentions and cooperativeness of others using probabilistic representations. We employ the framework of partially observable Markov decision processes (POMDPs) to model human decision making in a social context, focusing specifically on the volunteer's dilemma in a version of the classic Public Goods Game. We show that the POMDP model explains both the behavior of subjects as well as neural activity recorded using fMRI during the game. The decisions of subjects can be modeled across all trials using two interpretable parameters. Furthermore, the expected reward predicted by the model for each subject was correlated with the activation of brain areas related to reward expectation in social interactions. Our results suggest a probabilistic basis for human social decision making within the framework of expected reward maximization.", "bibtex": "@inproceedings{NIPS2016_1abb1e1e,\n author = {Khalvati, Koosha and Park, Seongmin A. and Dreher, Jean-Claude and Rao, Rajesh PN},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Probabilistic Model of Social Decision Making based on Reward Maximization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1abb1e1ea5f481b589da52303b091cbb-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1abb1e1ea5f481b589da52303b091cbb-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1abb1e1ea5f481b589da52303b091cbb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1abb1e1ea5f481b589da52303b091cbb-Reviews.html", "metareview": "", "pdf_size": 613655, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17758855755765102354&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, University of Washington; CNRS UMR 5229, Institut des Sciences Cognitives Marc Jeannerod; CNRS UMR 5229, Institut des Sciences Cognitives Marc Jeannerod; Department of Computer Science, University of Washington", "aff_domain": "cs.washington.edu;isc.cnrs.fr;isc.cnrs.fr;cs.washington.edu", "email": "cs.washington.edu;isc.cnrs.fr;isc.cnrs.fr;cs.washington.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1abb1e1ea5f481b589da52303b091cbb-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Washington;CNRS UMR 5229", "aff_unique_dep": "Department of Computer Science;Institut des Sciences Cognitives Marc Jeannerod", "aff_unique_url": "https://www.washington.edu;https://www.cnrs.fr", "aff_unique_abbr": "UW;CNRS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;France" }, { "title": "A Probabilistic Programming Approach To Probabilistic Data Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7271", "id": "7271", "author_site": "Feras Saad, Vikash Mansinghka", "author": "Feras Saad; Vikash K Mansinghka", "abstract": "Probabilistic techniques are central to data analysis, but different approaches can be challenging to apply, combine, and compare. This paper introduces composable generative population models (CGPMs), a computational abstraction that extends directed graphical models and can be used to describe and compose a broad class of probabilistic data analysis techniques. Examples include discriminative machine learning, hierarchical Bayesian models, multivariate kernel methods, clustering algorithms, and arbitrary probabilistic programs. We demonstrate the integration of CGPMs into BayesDB, a probabilistic programming platform that can express data analysis tasks using a modeling definition language and structured query language. The practical value is illustrated in two ways. First, the paper describes an analysis on a database of Earth satellites, which identifies records that probably violate Kepler\u2019s Third Law by composing causal probabilistic programs with non-parametric Bayes in 50 lines of probabilistic code. Second, it reports the lines of code and accuracy of CGPMs compared with baseline solutions from standard machine learning libraries.", "bibtex": "@inproceedings{NIPS2016_46072631,\n author = {Saad, Feras and Mansinghka, Vikash K},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Probabilistic Programming Approach To Probabilistic Data Analysis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/46072631582fc240dd2674a7d063b040-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/46072631582fc240dd2674a7d063b040-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/46072631582fc240dd2674a7d063b040-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/46072631582fc240dd2674a7d063b040-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/46072631582fc240dd2674a7d063b040-Reviews.html", "metareview": "", "pdf_size": 642310, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14016173265457663040&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "MIT Probabilistic Computing Project; MIT Probabilistic Computing Project", "aff_domain": "mit.edu;mit.edu", "email": "mit.edu;mit.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/46072631582fc240dd2674a7d063b040-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Probabilistic Computing Project", "aff_unique_url": "https://www.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A Pseudo-Bayesian Algorithm for Robust PCA", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7310", "id": "7310", "author_site": "Tae-Hyun Oh, Yasuyuki Matsushita, In So Kweon, David Wipf", "author": "Tae-Hyun Oh; Yasuyuki Matsushita; In Kweon; David Wipf", "abstract": "Commonly used in many applications, robust PCA represents an algorithmic attempt to reduce the sensitivity of classical PCA to outliers. The basic idea is to learn a decomposition of some data matrix of interest into low rank and sparse components, the latter representing unwanted outliers. Although the resulting problem is typically NP-hard, convex relaxations provide a computationally-expedient alternative with theoretical support. However, in practical regimes performance guarantees break down and a variety of non-convex alternatives, including Bayesian-inspired models, have been proposed to boost estimation quality. Unfortunately though, without additional a priori knowledge none of these methods can significantly expand the critical operational range such that exact principal subspace recovery is possible. Into this mix we propose a novel pseudo-Bayesian algorithm that explicitly compensates for design weaknesses in many existing non-convex approaches leading to state-of-the-art performance with a sound analytical foundation.", "bibtex": "@inproceedings{NIPS2016_f2201f51,\n author = {Oh, Tae-Hyun and Matsushita, Yasuyuki and Kweon, In and Wipf, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Pseudo-Bayesian Algorithm for Robust PCA},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f2201f5191c4e92cc5af043eebfd0946-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f2201f5191c4e92cc5af043eebfd0946-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f2201f5191c4e92cc5af043eebfd0946-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f2201f5191c4e92cc5af043eebfd0946-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f2201f5191c4e92cc5af043eebfd0946-Reviews.html", "metareview": "", "pdf_size": 524488, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14671047072943740352&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Electrical Engineering, KAIST, Daejeon, South Korea; Multimedia Engineering, Osaka University, Osaka, Japan; Electrical Engineering, KAIST, Daejeon, South Korea; Microsoft Research, Beijing, China", "aff_domain": "gmail.com;ist.osaka-u.ac.jp;kaist.ac.kr;microsoft.com", "email": "gmail.com;ist.osaka-u.ac.jp;kaist.ac.kr;microsoft.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f2201f5191c4e92cc5af043eebfd0946-Abstract.html", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "KAIST;Osaka University;Microsoft", "aff_unique_dep": "Electrical Engineering;Multimedia Engineering;Microsoft Research", "aff_unique_url": "https://www.kaist.ac.kr;https://www.osaka-u.ac.jp;https://www.microsoft.com/en-us/research/group/microsoft-research-asia", "aff_unique_abbr": "KAIST;Osaka U;MSR", "aff_campus_unique_index": "0;1;0;2", "aff_campus_unique": "Daejeon;Osaka;Beijing", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "South Korea;Japan;China" }, { "title": "A Simple Practical Accelerated Method for Finite Sums", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6921", "id": "6921", "author": "Aaron Defazio", "abstract": "Abstract We describe a novel optimization method for finite sums (such as empirical risk minimization problems) building on the recently introduced SAGA method. Our method achieves an accelerated convergence rate on strongly convex smooth problems. Our method has only one parameter (a step size), and is radically simpler than other accelerated methods for finite sums. Additionally it can be applied when the terms are non-smooth, yielding a method applicable in many areas where operator splitting methods would traditionally be applied.", "bibtex": "@inproceedings{NIPS2016_4f6ffe13,\n author = {Defazio, Aaron},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Simple Practical Accelerated Method for Finite Sums},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/4f6ffe13a5d75b2d6a3923922b3922e5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/4f6ffe13a5d75b2d6a3923922b3922e5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/4f6ffe13a5d75b2d6a3923922b3922e5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/4f6ffe13a5d75b2d6a3923922b3922e5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/4f6ffe13a5d75b2d6a3923922b3922e5-Reviews.html", "metareview": "", "pdf_size": 582117, "gs_citation": 169, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10342765975996242762&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Ambiata, Sydney Australia", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/4f6ffe13a5d75b2d6a3923922b3922e5-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Ambiata", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "0", "aff_campus_unique": "Sydney", "aff_country_unique_index": "0", "aff_country_unique": "Australia" }, { "title": "A Sparse Interactive Model for Matrix Completion with Side Information", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7319", "id": "7319", "author_site": "Jin Lu, Guannan Liang, Jiangwen Sun, Jinbo Bi", "author": "Jin Lu; Guannan Liang; Jiangwen Sun; Jinbo Bi", "abstract": "Matrix completion methods can benefit from side information besides the partially observed matrix. The use of side features describing the row and column entities of a matrix has been shown to reduce the sample complexity for completing the matrix. We propose a novel sparse formulation that explicitly models the interaction between the row and column side features to approximate the matrix entries. Unlike early methods, this model does not require the low-rank condition on the model parameter matrix. We prove that when the side features can span the latent feature space of the matrix to be recovered, the number of observed entries needed for an exact recovery is $O(\\log N)$ where $N$ is the size of the matrix. When the side features are corrupted latent features of the matrix with a small perturbation, our method can achieve an $\\epsilon$-recovery with $O(\\log N)$ sample complexity, and maintains a $\\O(N^{3/2})$ rate similar to classfic methods with no side information. An efficient linearized Lagrangian algorithm is developed with a strong guarantee of convergence. Empirical results show that our approach outperforms three state-of-the-art methods both in simulations and on real world datasets.", "bibtex": "@inproceedings{NIPS2016_093b60fd,\n author = {Lu, Jin and Liang, Guannan and Sun, Jiangwen and Bi, Jinbo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Sparse Interactive Model for Matrix Completion with Side Information},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/093b60fd0557804c8ba0cbf1453da22f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/093b60fd0557804c8ba0cbf1453da22f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/093b60fd0557804c8ba0cbf1453da22f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/093b60fd0557804c8ba0cbf1453da22f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/093b60fd0557804c8ba0cbf1453da22f-Reviews.html", "metareview": "", "pdf_size": 488169, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8547157079017982417&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/093b60fd0557804c8ba0cbf1453da22f-Abstract.html" }, { "title": "A Theoretically Grounded Application of Dropout in Recurrent Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7250", "id": "7250", "author_site": "Yarin Gal, Zoubin Ghahramani", "author": "Yarin Gal; Zoubin Ghahramani", "abstract": "Recurrent neural networks (RNNs) stand at the forefront of many recent developments in deep learning. Yet a major difficulty with these models is their tendency to overfit, with dropout shown to fail when applied to recurrent layers. Recent results at the intersection of Bayesian modelling and deep learning offer a Bayesian interpretation of common deep learning techniques such as dropout. This grounding of dropout in approximate Bayesian inference suggests an extension of the theoretical results, offering insights into the use of dropout with RNN models. We apply this new variational inference based dropout technique in LSTM and GRU models, assessing it on language modelling and sentiment analysis tasks. The new approach outperforms existing techniques, and to the best of our knowledge improves on the single model state-of-the-art in language modelling with the Penn Treebank (73.4 test perplexity). This extends our arsenal of variational tools in deep learning.", "bibtex": "@inproceedings{NIPS2016_076a0c97,\n author = {Gal, Yarin and Ghahramani, Zoubin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Theoretically Grounded Application of Dropout in Recurrent Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/076a0c97d09cf1a0ec3e19c7f2529f2b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/076a0c97d09cf1a0ec3e19c7f2529f2b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/076a0c97d09cf1a0ec3e19c7f2529f2b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/076a0c97d09cf1a0ec3e19c7f2529f2b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/076a0c97d09cf1a0ec3e19c7f2529f2b-Reviews.html", "metareview": "", "pdf_size": 911943, "gs_citation": 2144, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4962165342258626013&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "University of Cambridge; University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk", "email": "cam.ac.uk;cam.ac.uk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/076a0c97d09cf1a0ec3e19c7f2529f2b-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "A Unified Approach for Learning the Parameters of Sum-Product Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7345", "id": "7345", "author_site": "Han Zhao, Pascal Poupart, Geoffrey Gordon", "author": "Han Zhao; Pascal Poupart; Geoffrey J. Gordon", "abstract": "We present a unified approach for learning the parameters of Sum-Product networks (SPNs). We prove that any complete and decomposable SPN is equivalent to a mixture of trees where each tree corresponds to a product of univariate distributions. Based on the mixture model perspective, we characterize the objective function when learning SPNs based on the maximum likelihood estimation (MLE) principle and show that the optimization problem can be formulated as a signomial program. We construct two parameter learning algorithms for SPNs by using sequential monomial approximations (SMA) and the concave-convex procedure (CCCP), respectively. The two proposed methods naturally admit multiplicative updates, hence effectively avoiding the projection operation. With the help of the unified framework, we also show that, in the case of SPNs, CCCP leads to the same algorithm as Expectation Maximization (EM) despite the fact that they are different in general.", "bibtex": "@inproceedings{NIPS2016_6c9882bb,\n author = {Zhao, Han and Poupart, Pascal and Gordon, Geoffrey J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A Unified Approach for Learning the Parameters of Sum-Product Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6c9882bbac1c7093bd25041881277658-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6c9882bbac1c7093bd25041881277658-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6c9882bbac1c7093bd25041881277658-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6c9882bbac1c7093bd25041881277658-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6c9882bbac1c7093bd25041881277658-Reviews.html", "metareview": "", "pdf_size": 1181132, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6478592382459665385&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Machine Learning Dept., Carnegie Mellon University; School of Computer Science, University of Waterloo; Machine Learning Dept., Carnegie Mellon University", "aff_domain": "cs.cmu.edu;uwaterloo.ca;cs.cmu.edu", "email": "cs.cmu.edu;uwaterloo.ca;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6c9882bbac1c7093bd25041881277658-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;University of Waterloo", "aff_unique_dep": "Machine Learning Department;School of Computer Science", "aff_unique_url": "https://www.cmu.edu;https://uwaterloo.ca", "aff_unique_abbr": "CMU;UWaterloo", "aff_campus_unique_index": "1", "aff_campus_unique": ";Waterloo", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Canada" }, { "title": "A forward model at Purkinje cell synapses facilitates cerebellar anticipatory control", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6894", "id": "6894", "author_site": "Ivan Herreros, Xerxes Arsiwalla, Paul Verschure", "author": "Ivan Herreros; Xerxes Arsiwalla; Paul Verschure", "abstract": "How does our motor system solve the problem of anticipatory control in spite of a wide spectrum of response dynamics from different musculo-skeletal systems, transport delays as well as response latencies throughout the central nervous system? To a great extent, our highly-skilled motor responses are a result of a reactive feedback system, originating in the brain-stem and spinal cord, combined with a feed-forward anticipatory system, that is adaptively fine-tuned by sensory experience and originates in the cerebellum. Based on that interaction we design the counterfactual predictive control (CFPC) architecture, an anticipatory adaptive motor control scheme in which a feed-forward module, based on the cerebellum, steers an error feedback controller with counterfactual error signals. Those are signals that trigger reactions as actual errors would, but that do not code for any current of forthcoming errors. In order to determine the optimal learning strategy, we derive a novel learning rule for the feed-forward module that involves an eligibility trace and operates at the synaptic level. In particular, our eligibility trace provides a mechanism beyond co-incidence detection in that it convolves a history of prior synaptic inputs with error signals. In the context of cerebellar physiology, this solution implies that Purkinje cell synapses should generate eligibility traces using a forward model of the system being controlled. From an engineering perspective, CFPC provides a general-purpose anticipatory control architecture equipped with a learning rule that exploits the full dynamics of the closed-loop system.", "bibtex": "@inproceedings{NIPS2016_65699726,\n author = {Herreros, Ivan and Arsiwalla, Xerxes and Verschure, Paul},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A forward model at Purkinje cell synapses facilitates cerebellar anticipatory control},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/65699726a3c601b9f31bf04019c8593c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/65699726a3c601b9f31bf04019c8593c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/65699726a3c601b9f31bf04019c8593c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/65699726a3c601b9f31bf04019c8593c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/65699726a3c601b9f31bf04019c8593c-Reviews.html", "metareview": "", "pdf_size": 1082661, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3225966154178060252&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "SPECS lab, Universitat Pompeu Fabra, Barcelona, Spain; SPECS lab, Universitat Pompeu Fabra, Barcelona, Spain; SPECS, UPF, Catalan Institution of Research and Advanced Studies (ICREA), Barcelona, Spain", "aff_domain": "upf.edu; ; ", "email": "upf.edu; ; ", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/65699726a3c601b9f31bf04019c8593c-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Universitat Pompeu Fabra;Universitat Pompeu Fabra (UPF)", "aff_unique_dep": "SPECS lab;SPECS", "aff_unique_url": "https://www.upf.edu;https://www.upf.edu", "aff_unique_abbr": ";UPF", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Barcelona", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Spain" }, { "title": "A posteriori error bounds for joint matrix decomposition problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8496", "id": "8496", "author_site": "Nicol\u00f2 Colombo, Nikos Vlassis", "author": "Nicolo Colombo; Nikos Vlassis", "abstract": "Joint matrix triangularization is often used for estimating the joint eigenstructure of a set M of matrices, with applications in signal processing and machine learning. We consider the problem of approximate joint matrix triangularization when the matrices in M are jointly diagonalizable and real, but we only observe a set M' of noise perturbed versions of the matrices in M. Our main result is a first-order upper bound on the distance between any approximate joint triangularizer of the matrices in M' and any exact joint triangularizer of the matrices in M. The bound depends only on the observable matrices in M' and the noise level. In particular, it does not depend on optimization specific properties of the triangularizer, such as its proximity to critical points, that are typical of existing bounds in the literature. To our knowledge, this is the first a posteriori bound for joint matrix decomposition. We demonstrate the bound on synthetic data for which the ground truth is known.", "bibtex": "@inproceedings{NIPS2016_7884a965,\n author = {Colombo, Nicolo and Vlassis, Nikos},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A posteriori error bounds for joint matrix decomposition problems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7884a9652e94555c70f96b6be63be216-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7884a9652e94555c70f96b6be63be216-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7884a9652e94555c70f96b6be63be216-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7884a9652e94555c70f96b6be63be216-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7884a9652e94555c70f96b6be63be216-Reviews.html", "metareview": "", "pdf_size": 302844, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1169372603069940651&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Department of Statistical Science, University College London; Adobe Research, San Jose, CA", "aff_domain": "ucl.ac.uk;adobe.com", "email": "ucl.ac.uk;adobe.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7884a9652e94555c70f96b6be63be216-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University College London;Adobe", "aff_unique_dep": "Department of Statistical Science;Adobe Research", "aff_unique_url": "https://www.ucl.ac.uk;https://research.adobe.com", "aff_unique_abbr": "UCL;Adobe", "aff_campus_unique_index": "0;1", "aff_campus_unique": "London;San Jose", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "A primal-dual method for conic constrained distributed optimization problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7142", "id": "7142", "author_site": "Necdet Serhat Aybat, Erfan Yazdandoost Hamedani", "author": "Necdet Serhat Aybat; Erfan Yazdandoost Hamedani", "abstract": "We consider cooperative multi-agent consensus optimization problems over an undirected network of agents, where only those agents connected by an edge can directly communicate. The objective is to minimize the sum of agent-specific composite convex functions over agent-specific private conic constraint sets; hence, the optimal consensus decision should lie in the intersection of these private sets. We provide convergence rates in sub-optimality, infeasibility and consensus violation; examine the effect of underlying network topology on the convergence rates of the proposed decentralized algorithms; and show how to extend these methods to handle time-varying communication networks.", "bibtex": "@inproceedings{NIPS2016_743c41a9,\n author = {Aybat, Necdet Serhat and Yazdandoost Hamedani, Erfan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A primal-dual method for conic constrained distributed optimization problems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/743c41a921516b04afde48bb48e28ce6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/743c41a921516b04afde48bb48e28ce6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/743c41a921516b04afde48bb48e28ce6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/743c41a921516b04afde48bb48e28ce6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/743c41a921516b04afde48bb48e28ce6-Reviews.html", "metareview": "", "pdf_size": 798868, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4297772062039488794&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Industrial Engineering, Penn State University; Department of Industrial Engineering, Penn State University", "aff_domain": "psu.edu;psu.edu", "email": "psu.edu;psu.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/743c41a921516b04afde48bb48e28ce6-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Penn State University", "aff_unique_dep": "Department of Industrial Engineering", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A scalable end-to-end Gaussian process adapter for irregularly sampled time series classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7120", "id": "7120", "author_site": "Steven Cheng-Xian Li, Benjamin Marlin", "author": "Steven Cheng-Xian Li; Benjamin M. Marlin", "abstract": "We present a general framework for classification of sparse and irregularly-sampled time series. The properties of such time series can result in substantial uncertainty about the values of the underlying temporal processes, while making the data difficult to deal with using standard classification methods that assume fixed-dimensional feature spaces. To address these challenges, we propose an uncertainty-aware classification framework based on a special computational layer we refer to as the Gaussian process adapter that can connect irregularly sampled time series data to any black-box classifier learnable using gradient descent. We show how to scale up the required computations based on combining the structured kernel interpolation framework and the Lanczos approximation method, and how to discriminatively train the Gaussian process adapter in combination with a number of classifiers end-to-end using backpropagation.", "bibtex": "@inproceedings{NIPS2016_9c01802d,\n author = {Li, Steven Cheng-Xian and Marlin, Benjamin M},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A scalable end-to-end Gaussian process adapter for irregularly sampled time series classification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9c01802ddb981e6bcfbec0f0516b8e35-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9c01802ddb981e6bcfbec0f0516b8e35-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9c01802ddb981e6bcfbec0f0516b8e35-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9c01802ddb981e6bcfbec0f0516b8e35-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9c01802ddb981e6bcfbec0f0516b8e35-Reviews.html", "metareview": "", "pdf_size": 432143, "gs_citation": 113, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9464341548908737511&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "College of Information and Computer Sciences, University of Massachusetts Amherst; College of Information and Computer Sciences, University of Massachusetts Amherst", "aff_domain": "cs.umass.edu;cs.umass.edu", "email": "cs.umass.edu;cs.umass.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9c01802ddb981e6bcfbec0f0516b8e35-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Massachusetts Amherst", "aff_unique_dep": "College of Information and Computer Sciences", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass Amherst", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "A scaled Bregman theorem with applications", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7289", "id": "7289", "author_site": "Richard Nock, Aditya Menon, Cheng Soon Ong", "author": "Richard Nock; Aditya Menon; Cheng Soon Ong", "abstract": "Bregman divergences play a central role in the design and analysis of a range of machine learning algorithms through a handful of popular theorems. We present a new theorem which shows that ``Bregman distortions'' (employing a potentially non-convex generator) may be exactly re-written as a scaled Bregman divergence computed over transformed data. This property can be viewed from the standpoints of geometry (a scaled isometry with adaptive metrics) or convex optimization (relating generalized perspective transforms). Admissible distortions include {geodesic distances} on curved manifolds and projections or gauge-normalisation. Our theorem allows one to leverage to the wealth and convenience of Bregman divergences when analysing algorithms relying on the aforementioned Bregman distortions. We illustrate this with three novel applications of our theorem: a reduction from multi-class density ratio to class-probability estimation, a new adaptive projection free yet norm-enforcing dual norm mirror descent algorithm, and a reduction from clustering on flat manifolds to clustering on curved manifolds. Experiments on each of these domains validate the analyses and suggest that the scaled Bregman theorem might be a worthy addition to the popular handful of Bregman divergence properties that have been pervasive in machine learning.", "bibtex": "@inproceedings{NIPS2016_c9f0f895,\n author = {Nock, Richard and Menon, Aditya and Ong, Cheng Soon},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A scaled Bregman theorem with applications},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c9f0f895fb98ab9159f51fd0297e236d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c9f0f895fb98ab9159f51fd0297e236d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c9f0f895fb98ab9159f51fd0297e236d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c9f0f895fb98ab9159f51fd0297e236d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c9f0f895fb98ab9159f51fd0297e236d-Reviews.html", "metareview": "", "pdf_size": 1090878, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3467571578529843723&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 16, "aff": "Data61+the Australian National University+the University of Sydney; Data61+the Australian National University; Data61+the Australian National University", "aff_domain": "data61.csiro.au;data61.csiro.au;data61.csiro.au", "email": "data61.csiro.au;data61.csiro.au;data61.csiro.au", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c9f0f895fb98ab9159f51fd0297e236d-Abstract.html", "aff_unique_index": "0+1+2;0+1;0+1", "aff_unique_norm": "Data61;Australian National University;University of Sydney", "aff_unique_dep": ";;", "aff_unique_url": "https://data61.csiro.au;https://www.anu.edu.au;https://www.sydney.edu.au", "aff_unique_abbr": "Data61;ANU;USYD", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0+0+0;0+0;0+0", "aff_country_unique": "Australia" }, { "title": "A state-space model of cross-region dynamic connectivity in MEG/EEG", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7054", "id": "7054", "author_site": "Ying Yang, Elissa Aminoff, Michael Tarr, Robert E Kass", "author": "Ying Yang; Elissa Aminoff; Michael Tarr; Robert E Kass", "abstract": "Cross-region dynamic connectivity, which describes spatio-temporal dependence of neural activity among multiple brain regions of interest (ROIs), can provide important information for understanding cognition. For estimating such connectivity, magnetoencephalography (MEG) and electroencephalography (EEG) are well-suited tools because of their millisecond temporal resolution. However, localizing source activity in the brain requires solving an under-determined linear problem. In typical two-step approaches, researchers first solve the linear problem with general priors assuming independence across ROIs, and secondly quantify cross-region connectivity. In this work, we propose a one-step state-space model to improve estimation of dynamic connectivity. The model treats the mean activity in individual ROIs as the state variable, and describes non-stationary dynamic dependence across ROIs using time-varying auto-regression. Compared with a two-step method, which first obtains the commonly used minimum-norm estimates of source activity, and then fits the auto-regressive model, our state-space model yielded smaller estimation errors on simulated data where the model assumptions held. When applied on empirical MEG data from one participant in a scene-processing experiment, our state-space model also demonstrated intriguing preliminary results, indicating leading and lagged linear dependence between the early visual cortex and a higher-level scene-sensitive region, which could reflect feed-forward and feedback information flow within the visual cortex during scene processing.", "bibtex": "@inproceedings{NIPS2016_9f396fe4,\n author = {Yang, Ying and Aminoff, Elissa and Tarr, Michael and Kass, Robert E},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {A state-space model of cross-region dynamic connectivity in MEG/EEG},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9f396fe44e7c05c16873b05ec425cbad-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9f396fe44e7c05c16873b05ec425cbad-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9f396fe44e7c05c16873b05ec425cbad-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9f396fe44e7c05c16873b05ec425cbad-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9f396fe44e7c05c16873b05ec425cbad-Reviews.html", "metareview": "", "pdf_size": 762914, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10025164854236105873&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9f396fe44e7c05c16873b05ec425cbad-Abstract.html" }, { "title": "Accelerating Stochastic Composition Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7395", "id": "7395", "author_site": "Mengdi Wang, Ji Liu, Ethan Fang", "author": "Mengdi Wang; Ji Liu; Ethan Fang", "abstract": "Consider the stochastic composition optimization problem where the objective is a composition of two expected-value functions. We propose a new stochastic first-order method, namely the accelerated stochastic compositional proximal gradient (ASC-PG) method, which updates based on queries to the sampling oracle using two different timescales. The ASC-PG is the first proximal gradient method for the stochastic composition problem that can deal with nonsmooth regularization penalty. We show that the ASC-PG exhibits faster convergence than the best known algorithms, and that it achieves the optimal sample-error complexity in several important special cases. We further demonstrate the application of ASC-PG to reinforcement learning and conduct numerical experiments.", "bibtex": "@inproceedings{NIPS2016_92262bf9,\n author = {Wang, Mengdi and Liu, Ji and Fang, Ethan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Accelerating Stochastic Composition Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/92262bf907af914b95a0fc33c3f33bf6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/92262bf907af914b95a0fc33c3f33bf6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/92262bf907af914b95a0fc33c3f33bf6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/92262bf907af914b95a0fc33c3f33bf6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/92262bf907af914b95a0fc33c3f33bf6-Reviews.html", "metareview": "", "pdf_size": 481451, "gs_citation": 178, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8919065841152724134&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 18, "aff": "Princeton University; University of Rochester; Pennsylvania State University", "aff_domain": "princeton.edu;gmail.com;psu.edu", "email": "princeton.edu;gmail.com;psu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/92262bf907af914b95a0fc33c3f33bf6-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Princeton University;University of Rochester;Pennsylvania State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;https://www.rochester.edu;https://www.psu.edu", "aff_unique_abbr": "Princeton;U of R;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Achieving budget-optimality with adaptive schemes in crowdsourcing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7156", "id": "7156", "author_site": "Ashish Khetan, Sewoong Oh", "author": "Ashish Khetan; Sewoong Oh", "abstract": "Adaptive schemes, where tasks are assigned based on the data collected thus far, are widely used in practical crowdsourcing systems to efficiently allocate the budget. However, existing theoretical analyses of crowdsourcing systems suggest that the gain of adaptive task assignments is minimal. To bridge this gap, we investigate this question under a strictly more general probabilistic model, which has been recently introduced to model practical crowdsourcing data sets. Under this generalized Dawid-Skene model, we characterize the fundamental trade-off between budget and accuracy, and introduce a novel adaptive scheme that matches this fundamental limit. We further quantify the gain of adaptivity, by comparing the trade-off with the one for non-adaptive schemes, and confirm that the gain is significant and can be made arbitrarily large depending on the distribution of the difficulty level of the tasks at hand.", "bibtex": "@inproceedings{NIPS2016_03e7ef47,\n author = {Khetan, Ashish and Oh, Sewoong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Achieving budget-optimality with adaptive schemes in crowdsourcing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/03e7ef47cee6fa4ae7567394b99912b7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/03e7ef47cee6fa4ae7567394b99912b7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/03e7ef47cee6fa4ae7567394b99912b7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/03e7ef47cee6fa4ae7567394b99912b7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/03e7ef47cee6fa4ae7567394b99912b7-Reviews.html", "metareview": "", "pdf_size": 307559, "gs_citation": 86, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2503478038320431132&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of ISE, University of Illinois at Urbana-Champaign; Department of ISE, University of Illinois at Urbana-Champaign", "aff_domain": "illinois.edu;illinois.edu", "email": "illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/03e7ef47cee6fa4ae7567394b99912b7-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Industrial and Systems Engineering", "aff_unique_url": "https://www.illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Achieving the KS threshold in the general stochastic block model with linearized acyclic belief propagation", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7414", "id": "7414", "author_site": "Emmanuel Abbe, Colin Sandon", "author": "Emmanuel Abbe; Colin Sandon", "abstract": "The stochastic block model (SBM) has long been studied in machine learning and network science as a canonical model for clustering and community detection. In the recent years, new developments have demonstrated the presence of threshold phenomena for this model, which have set new challenges for algorithms. For the {\\it detection} problem in symmetric SBMs, Decelle et al.\\ conjectured that the so-called Kesten-Stigum (KS) threshold can be achieved efficiently. This was proved for two communities, but remained open from three communities. We prove this conjecture here, obtaining a more general result that applies to arbitrary SBMs with linear size communities. The developed algorithm is a linearized acyclic belief propagation (ABP) algorithm, which mitigates the effects of cycles while provably achieving the KS threshold in $O(n \\ln n)$ time. This extends prior methods by achieving universally the KS threshold while reducing or preserving the computational complexity. ABP is also connected to a power iteration method on a generalized nonbacktracking operator, formalizing the spectral-message passing interplay described in Krzakala et al., and extending results from Bordenave et al.", "bibtex": "@inproceedings{NIPS2016_6c29793a,\n author = {Abbe, Emmanuel and Sandon, Colin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Achieving the KS threshold in the general stochastic block model with linearized acyclic belief propagation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6c29793a140a811d0c45ce03c1c93a28-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6c29793a140a811d0c45ce03c1c93a28-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6c29793a140a811d0c45ce03c1c93a28-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6c29793a140a811d0c45ce03c1c93a28-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6c29793a140a811d0c45ce03c1c93a28-Reviews.html", "metareview": "", "pdf_size": 262738, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9141403368649917305&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Applied and Computational Mathematics and EE Dept., Princeton University; Department of Mathematics, Princeton University", "aff_domain": "princeton.edu;princeton.edu", "email": "princeton.edu;princeton.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6c29793a140a811d0c45ce03c1c93a28-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "Department of Applied and Computational Mathematics and Electrical Engineering", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Active Learning from Imperfect Labelers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6972", "id": "6972", "author_site": "Songbai Yan, Kamalika Chaudhuri, Tara Javidi", "author": "Songbai Yan; Kamalika Chaudhuri; Tara Javidi", "abstract": "We study active learning where the labeler can not only return incorrect labels but also abstain from labeling. We consider different noise and abstention conditions of the labeler. We propose an algorithm which utilizes abstention responses, and analyze its statistical consistency and query complexity under fairly natural assumptions on the noise and abstention rate of the labeler. This algorithm is adaptive in a sense that it can automatically request less queries with a more informed or less noisy labeler. We couple our algorithm with lower bounds to show that under some technical conditions, it achieves nearly optimal query complexity.", "bibtex": "@inproceedings{NIPS2016_dd77279f,\n author = {Yan, Songbai and Chaudhuri, Kamalika and Javidi, Tara},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Active Learning from Imperfect Labelers},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/dd77279f7d325eec933f05b1672f6a1f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/dd77279f7d325eec933f05b1672f6a1f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/dd77279f7d325eec933f05b1672f6a1f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/dd77279f7d325eec933f05b1672f6a1f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/dd77279f7d325eec933f05b1672f6a1f-Reviews.html", "metareview": "", "pdf_size": 223154, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1610604259229687874&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 12, "aff": "University of California, San Diego; University of California, San Diego; University of California, San Diego", "aff_domain": "eng.ucsd.edu;cs.ucsd.edu;eng.ucsd.edu", "email": "eng.ucsd.edu;cs.ucsd.edu;eng.ucsd.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/dd77279f7d325eec933f05b1672f6a1f-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Active Learning with Oracle Epiphany", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6903", "id": "6903", "author_site": "Tzu-Kuo Huang, Lihong Li, Ara Vartanian, Saleema Amershi, Jerry Zhu", "author": "Tzu-Kuo Huang; Lihong Li; Ara Vartanian; Saleema Amershi; Xiaojin Zhu", "abstract": "We present a theoretical analysis of active learning with more realistic interactions with human oracles. Previous empirical studies have shown oracles abstaining on difficult queries until accumulating enough information to make label decisions. We formalize this phenomenon with an \u201coracle epiphany model\u201d and analyze active learning query complexity under such oracles for both the realizable and the agnos- tic cases. Our analysis shows that active learning is possible with oracle epiphany, but incurs an additional cost depending on when the epiphany happens. Our results suggest new, principled active learning approaches with realistic oracles.", "bibtex": "@inproceedings{NIPS2016_299fb214,\n author = {Huang, Tzu-Kuo and Li, Lihong and Vartanian, Ara and Amershi, Saleema and Zhu, Jerry},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Active Learning with Oracle Epiphany},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/299fb2142d7de959380f91c01c3a293c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/299fb2142d7de959380f91c01c3a293c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/299fb2142d7de959380f91c01c3a293c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/299fb2142d7de959380f91c01c3a293c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/299fb2142d7de959380f91c01c3a293c-Reviews.html", "metareview": "", "pdf_size": 323517, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9865143078326625288&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Uber Advanced Technologies Group, Pittsburgh, PA 15201; Microsoft Research, Redmond, WA 98052; University of Wisconsin\u2013Madison, Madison, WI 53706; Microsoft Research, Redmond, WA 98052; University of Wisconsin\u2013Madison, Madison, WI 53706", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/299fb2142d7de959380f91c01c3a293c-Abstract.html", "aff_unique_index": "0;1;2;1;2", "aff_unique_norm": "Uber Advanced Technologies Group;Microsoft;University of Wisconsin\u2013Madison", "aff_unique_dep": "Advanced Technologies Group;Microsoft Research;", "aff_unique_url": "https://www.uber.com;https://www.microsoft.com/en-us/research;https://www.wisc.edu", "aff_unique_abbr": "Uber ATG;MSR;UW\u2013Madison", "aff_campus_unique_index": "0;1;2;1;2", "aff_campus_unique": "Pittsburgh;Redmond;Madison", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Active Nearest-Neighbor Learning in Metric Spaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7375", "id": "7375", "author_site": "Aryeh Kontorovich, Sivan Sabato, Ruth Urner", "author": "Aryeh Kontorovich; Sivan Sabato; Ruth Urner", "abstract": "We propose a pool-based non-parametric active learning algorithm for general metric spaces, called MArgin Regularized Metric Active Nearest Neighbor (MARMANN), which outputs a nearest-neighbor classifier. We give prediction error guarantees that depend on the noisy-margin properties of the input sample, and are competitive with those obtained by previously proposed passive learners. We prove that the label complexity of MARMANN is significantly lower than that of any passive learner with similar error guarantees. Our algorithm is based on a generalized sample compression scheme and a new label-efficient active model-selection procedure.", "bibtex": "@inproceedings{NIPS2016_13f320e7,\n author = {Kontorovich, Aryeh and Sabato, Sivan and Urner, Ruth},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Active Nearest-Neighbor Learning in Metric Spaces},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/13f320e7b5ead1024ac95c3b208610db-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/13f320e7b5ead1024ac95c3b208610db-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/13f320e7b5ead1024ac95c3b208610db-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/13f320e7b5ead1024ac95c3b208610db-Reviews.html", "metareview": "", "pdf_size": 341826, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14008372053814214608&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 12, "aff": "Department of Computer Science, Ben-Gurion University of the Negev; Department of Computer Science, Ben-Gurion University of the Negev; Max Planck Institute for Intelligent Systems, Department for Empirical Inference", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/13f320e7b5ead1024ac95c3b208610db-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Ben-Gurion University of the Negev;Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Department of Computer Science;Department for Empirical Inference", "aff_unique_url": "https://www.bgu.ac.il;https://www.mpituebingen.mpg.de", "aff_unique_abbr": "BGU;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Israel;Germany" }, { "title": "Adaptive Averaging in Accelerated Descent Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7342", "id": "7342", "author_site": "Walid Krichene, Alexandre Bayen, Peter Bartlett", "author": "Walid Krichene; Alexandre Bayen; Peter L Bartlett", "abstract": "We study accelerated descent dynamics for constrained convex optimization. This dynamics can be described naturally as a coupling of a dual variable accumulating gradients at a given rate $\\eta(t)$, and a primal variable obtained as the weighted average of the mirrored dual trajectory, with weights $w(t)$. Using a Lyapunov argument, we give sufficient conditions on $\\eta$ and $w$ to achieve a desired convergence rate. As an example, we show that the replicator dynamics (an example of mirror descent on the simplex) can be accelerated using a simple averaging scheme. We then propose an adaptive averaging heuristic which adaptively computes the weights to speed up the decrease of the Lyapunov function. We provide guarantees on adaptive averaging in continuous-time, prove that it preserves the quadratic convergence rate of accelerated first-order methods in discrete-time, and give numerical experiments to compare it with existing heuristics, such as adaptive restarting. The experiments indicate that adaptive averaging performs at least as well as adaptive restarting, with significant improvements in some cases.", "bibtex": "@inproceedings{NIPS2016_1714726c,\n author = {Krichene, Walid and Bayen, Alexandre and Bartlett, Peter L},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Averaging in Accelerated Descent Dynamics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1714726c817af50457d810aae9d27a2e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1714726c817af50457d810aae9d27a2e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/1714726c817af50457d810aae9d27a2e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1714726c817af50457d810aae9d27a2e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1714726c817af50457d810aae9d27a2e-Reviews.html", "metareview": "", "pdf_size": 1368732, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11960198370193816409&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "UC Berkeley + Google; UC Berkeley; UC Berkeley and QUT", "aff_domain": "eecs.berkeley.edu;berkeley.edu;cs.berkeley.edu", "email": "eecs.berkeley.edu;berkeley.edu;cs.berkeley.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1714726c817af50457d810aae9d27a2e-Abstract.html", "aff_unique_index": "0+1;0;0", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0+1;0;0", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0+0;0;0", "aff_country_unique": "United States" }, { "title": "Adaptive Concentration Inequalities for Sequential Decision Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7193", "id": "7193", "author_site": "Shengjia Zhao, Enze Zhou, Ashish Sabharwal, Stefano Ermon", "author": "Shengjia Zhao; Enze Zhou; Ashish Sabharwal; Stefano Ermon", "abstract": "A key challenge in sequential decision problems is to determine how many samples are needed for an agent to make reliable decisions with good probabilistic guarantees. We introduce Hoeffding-like concentration inequalities that hold for a random, adaptively chosen number of samples. Our inequalities are tight under natural assumptions and can greatly simplify the analysis of common sequential decision problems. In particular, we apply them to sequential hypothesis testing, best arm identification, and sorting. The resulting algorithms rival or exceed the state of the art both theoretically and empirically.", "bibtex": "@inproceedings{NIPS2016_2e65f2f2,\n author = {Zhao, Shengjia and Zhou, Enze and Sabharwal, Ashish and Ermon, Stefano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Concentration Inequalities for Sequential Decision Problems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2e65f2f2fdaf6c699b223c61b1b5ab89-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2e65f2f2fdaf6c699b223c61b1b5ab89-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2e65f2f2fdaf6c699b223c61b1b5ab89-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2e65f2f2fdaf6c699b223c61b1b5ab89-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2e65f2f2fdaf6c699b223c61b1b5ab89-Reviews.html", "metareview": "", "pdf_size": 719237, "gs_citation": 59, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1593152435576043394&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Tsinghua University; Tsinghua University; Allen Institute for AI; Stanford University", "aff_domain": "stanford.edu;126.com;allenai.org;cs.stanford.edu", "email": "stanford.edu;126.com;allenai.org;cs.stanford.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2e65f2f2fdaf6c699b223c61b1b5ab89-Abstract.html", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Tsinghua University;Allen Institute for AI;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://allenai.org;https://www.stanford.edu", "aff_unique_abbr": "THU;AI2;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Adaptive Maximization of Pointwise Submodular Functions With Budget Constraint", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7036", "id": "7036", "author_site": "Nguyen Viet Cuong, Huan Xu", "author": "Nguyen Cuong; Huan Xu", "abstract": "We study the worst-case adaptive optimization problem with budget constraint that is useful for modeling various practical applications in artificial intelligence and machine learning. We investigate the near-optimality of greedy algorithms for this problem with both modular and non-modular cost functions. In both cases, we prove that two simple greedy algorithms are not near-optimal but the best between them is near-optimal if the utility function satisfies pointwise submodularity and pointwise cost-sensitive submodularity respectively. This implies a combined algorithm that is near-optimal with respect to the optimal algorithm that uses half of the budget. We discuss applications of our theoretical results and also report experiments comparing the greedy algorithms on the active learning problem.", "bibtex": "@inproceedings{NIPS2016_9fe8593a,\n author = {Cuong, Nguyen and Xu, Huan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Maximization of Pointwise Submodular Functions With Budget Constraint},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9fe8593a8a330607d76796b35c64c600-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9fe8593a8a330607d76796b35c64c600-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9fe8593a8a330607d76796b35c64c600-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9fe8593a8a330607d76796b35c64c600-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9fe8593a8a330607d76796b35c64c600-Reviews.html", "metareview": "", "pdf_size": 269889, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1324978708709774616&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Department of Engineering, University of Cambridge; Stewart School of Industrial & Systems Engineering, Georgia Institute of Technology", "aff_domain": "cam.ac.uk;isye.gatech.edu", "email": "cam.ac.uk;isye.gatech.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9fe8593a8a330607d76796b35c64c600-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University of Cambridge;Georgia Institute of Technology", "aff_unique_dep": "Department of Engineering;Stewart School of Industrial & Systems Engineering", "aff_unique_url": "https://www.cam.ac.uk;https://www.gatech.edu", "aff_unique_abbr": "Cambridge;Georgia Tech", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Cambridge;Atlanta", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Adaptive Neural Compilation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7287", "id": "7287", "author_site": "Rudy Bunel, Alban Desmaison, Pawan K Mudigonda, Pushmeet Kohli, Philip Torr", "author": "Rudy R Bunel; Alban Desmaison; Pawan K Mudigonda; Pushmeet Kohli; Philip Torr", "abstract": "This paper proposes an adaptive neural-compilation framework to address the problem of learning efficient program. Traditional code optimisation strategies used in compilers are based on applying pre-specified set of transformations that make the code faster to execute without changing its semantics. In contrast, our work involves adapting programs to make them more efficient while considering correctness only on a target input distribution. Our approach is inspired by the recent works on differentiable representations of programs. We show that it is possible to compile programs written in a low-level language to a differentiable representation. We also show how programs in this representation can be optimised to make them efficient on a target distribution of inputs. Experimental results demonstrate that our approach enables learning specifically-tuned algorithms for given data distributions with a high success rate.", "bibtex": "@inproceedings{NIPS2016_f0adc883,\n author = {Bunel, Rudy R and Desmaison, Alban and Mudigonda, Pawan K and Kohli, Pushmeet and Torr, Philip},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Neural Compilation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f0adc8838f4bdedde4ec2cfad0515589-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f0adc8838f4bdedde4ec2cfad0515589-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f0adc8838f4bdedde4ec2cfad0515589-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f0adc8838f4bdedde4ec2cfad0515589-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f0adc8838f4bdedde4ec2cfad0515589-Reviews.html", "metareview": "", "pdf_size": 317014, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7659243913910635715&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "University of Oxford; University of Oxford; Microsoft Research; University of Oxford; University of Oxford", "aff_domain": "robots.ox.ac.uk;robots.ox.ac.uk;microsoft.com;eng.ox.ac.uk;robots.ox.ac.uk", "email": "robots.ox.ac.uk;robots.ox.ac.uk;microsoft.com;eng.ox.ac.uk;robots.ox.ac.uk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f0adc8838f4bdedde4ec2cfad0515589-Abstract.html", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Oxford;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.ox.ac.uk;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Oxford;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Adaptive Newton Method for Empirical Risk Minimization to Statistical Accuracy", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7124", "id": "7124", "author_site": "Aryan Mokhtari, Hadi Daneshmand, Aurelien Lucchi, Thomas Hofmann, Alejandro Ribeiro", "author": "Aryan Mokhtari; Hadi Daneshmand; Aurelien Lucchi; Thomas Hofmann; Alejandro Ribeiro", "abstract": "We consider empirical risk minimization for large-scale datasets. We introduce Ada Newton as an adaptive algorithm that uses Newton's method with adaptive sample sizes. The main idea of Ada Newton is to increase the size of the training set by a factor larger than one in a way that the minimization variable for the current training set is in the local neighborhood of the optimal argument of the next training set. This allows to exploit the quadratic convergence property of Newton's method and reach the statistical accuracy of each training set with only one iteration of Newton's method. We show theoretically that we can iteratively increase the sample size while applying single Newton iterations without line search and staying within the statistical accuracy of the regularized empirical risk. In particular, we can double the size of the training set in each iteration when the number of samples is sufficiently large. Numerical experiments on various datasets confirm the possibility of increasing the sample size by factor 2 at each iteration which implies that Ada Newton achieves the statistical accuracy of the full training set with about two passes over the dataset.", "bibtex": "@inproceedings{NIPS2016_9f62b862,\n author = {Mokhtari, Aryan and Daneshmand, Hadi and Lucchi, Aurelien and Hofmann, Thomas and Ribeiro, Alejandro},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Newton Method for Empirical Risk Minimization to Statistical Accuracy},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9f62b8625f914a002496335037e9ad97-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9f62b8625f914a002496335037e9ad97-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9f62b8625f914a002496335037e9ad97-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9f62b8625f914a002496335037e9ad97-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9f62b8625f914a002496335037e9ad97-Reviews.html", "metareview": "", "pdf_size": 315195, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14874161381996654814&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "University of Pennsylvania; ETH Zurich, Switzerland; ETH Zurich, Switzerland; ETH Zurich, Switzerland; University of Pennsylvania", "aff_domain": "seas.upenn.edu;inf.ethz.ch;inf.ethz.ch;inf.ethz.ch;seas.upenn.edu", "email": "seas.upenn.edu;inf.ethz.ch;inf.ethz.ch;inf.ethz.ch;seas.upenn.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9f62b8625f914a002496335037e9ad97-Abstract.html", "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of Pennsylvania;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.ethz.ch", "aff_unique_abbr": "UPenn;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0", "aff_country_unique": "United States;Switzerland" }, { "title": "Adaptive Skills Adaptive Partitions (ASAP)", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6981", "id": "6981", "author_site": "Daniel J Mankowitz, Timothy A Mann, Shie Mannor", "author": "Daniel J Mankowitz; Timothy A Mann; Shie Mannor", "abstract": "We introduce the Adaptive Skills, Adaptive Partitions (ASAP) framework that (1) learns skills (i.e., temporally extended actions or options) as well as (2) where to apply them. We believe that both (1) and (2) are necessary for a truly general skill learning framework, which is a key building block needed to scale up to lifelong learning agents. The ASAP framework is also able to solve related new tasks simply by adapting where it applies its existing learned skills. We prove that ASAP converges to a local optimum under natural conditions. Finally, our experimental results, which include a RoboCup domain, demonstrate the ability of ASAP to learn where to reuse skills as well as solve multiple tasks with considerably less experience than solving each task from scratch.", "bibtex": "@inproceedings{NIPS2016_98d6f58a,\n author = {Mankowitz, Daniel J and Mann, Timothy A and Mannor, Shie},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Skills Adaptive Partitions (ASAP)},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/98d6f58ab0dafbb86b083a001561bb34-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/98d6f58ab0dafbb86b083a001561bb34-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/98d6f58ab0dafbb86b083a001561bb34-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/98d6f58ab0dafbb86b083a001561bb34-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/98d6f58ab0dafbb86b083a001561bb34-Reviews.html", "metareview": "", "pdf_size": 1136887, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16166698196712198046&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "The Technion - Israel Institute of Technology; The Technion - Israel Institute of Technology + Google Deepmind; The Technion - Israel Institute of Technology", "aff_domain": "tx.technion.ac.il;acm.org;ee.technion.ac.il", "email": "tx.technion.ac.il;acm.org;ee.technion.ac.il", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/98d6f58ab0dafbb86b083a001561bb34-Abstract.html", "aff_unique_index": "0;0+1;0", "aff_unique_norm": "Israel Institute of Technology;DeepMind", "aff_unique_dep": ";DeepMind", "aff_unique_url": "https://www.technion.ac.il/en/;https://deepmind.com", "aff_unique_abbr": "Technion;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+1;0", "aff_country_unique": "Israel;United Kingdom" }, { "title": "Adaptive Smoothed Online Multi-Task Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6944", "id": "6944", "author_site": "Keerthiram Murugesan, Hanxiao Liu, Jaime Carbonell, Yiming Yang", "author": "Keerthiram Murugesan; Hanxiao Liu; Jaime Carbonell; Yiming Yang", "abstract": "This paper addresses the challenge of jointly learning both the per-task model parameters and the inter-task relationships in a multi-task online learning setting. The proposed algorithm features probabilistic interpretation, efficient updating rules and flexible modulation on whether learners focus on their specific task or on jointly address all tasks. The paper also proves a sub-linear regret bound as compared to the best linear predictor in hindsight. Experiments over three multi-task learning benchmark datasets show advantageous performance of the proposed approach over several state-of-the-art online multi-task learning baselines.", "bibtex": "@inproceedings{NIPS2016_a869ccbc,\n author = {Murugesan, Keerthiram and Liu, Hanxiao and Carbonell, Jaime and Yang, Yiming},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive Smoothed Online Multi-Task Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a869ccbcbd9568808b8497e28275c7c8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a869ccbcbd9568808b8497e28275c7c8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a869ccbcbd9568808b8497e28275c7c8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a869ccbcbd9568808b8497e28275c7c8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a869ccbcbd9568808b8497e28275c7c8-Reviews.html", "metareview": "", "pdf_size": 432339, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11434297261770086687&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a869ccbcbd9568808b8497e28275c7c8-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Adaptive optimal training of animal behavior", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6888", "id": "6888", "author_site": "Ji Hyun Bak, Jung Choi, Ilana Witten, Athena Akrami, Jonathan Pillow", "author": "Ji Hyun Bak; Jung Yoon Choi; Athena Akrami; Ilana Witten; Jonathan W Pillow", "abstract": "Neuroscience experiments often require training animals to perform tasks designed to elicit various sensory, cognitive, and motor behaviors. Training typically involves a series of gradual adjustments of stimulus conditions and rewards in order to bring about learning. However, training protocols are usually hand-designed, relying on a combination of intuition, guesswork, and trial-and-error, and often require weeks or months to achieve a desired level of task performance. Here we combine ideas from reinforcement learning and adaptive optimal experimental design to formulate methods for adaptive optimal training of animal behavior. Our work addresses two intriguing problems at once: first, it seeks to infer the learning rules underlying an animal's behavioral changes during training; second, it seeks to exploit these rules to select stimuli that will maximize the rate of learning toward a desired objective. We develop and test these methods using data collected from rats during training on a two-interval sensory discrimination task. We show that we can accurately infer the parameters of a policy-gradient-based learning algorithm that describes how the animal's internal model of the task evolves over the course of training. We then formulate a theory for optimal training, which involves selecting sequences of stimuli that will drive the animal's internal policy toward a desired location in the parameter space. Simulations show that our method can in theory provide a substantial speedup over standard training methods. We feel these results will hold considerable theoretical and practical implications both for researchers in reinforcement learning and for experimentalists seeking to train animals.", "bibtex": "@inproceedings{NIPS2016_7fec306d,\n author = {Bak, Ji Hyun and Choi, Jung Yoon and Akrami, Athena and Witten, Ilana and Pillow, Jonathan W},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adaptive optimal training of animal behavior},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7fec306d1e665bc9c748b5d2b99a6e97-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7fec306d1e665bc9c748b5d2b99a6e97-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7fec306d1e665bc9c748b5d2b99a6e97-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7fec306d1e665bc9c748b5d2b99a6e97-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7fec306d1e665bc9c748b5d2b99a6e97-Reviews.html", "metareview": "", "pdf_size": 1950429, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=352263505322395281&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 16, "aff": "Department of Physics, Princeton University + School of Computational Sciences, Korea Institute for Advanced Study; Department of Psychology, Princeton University + Princeton Neuroscience Institute, Princeton University; Princeton Neuroscience Institute, Princeton University + Howard Hughes Medical Institute; Department of Psychology, Princeton University + Princeton Neuroscience Institute, Princeton University; Department of Psychology, Princeton University + Princeton Neuroscience Institute, Princeton University", "aff_domain": "kias.re.kr;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "email": "kias.re.kr;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7fec306d1e665bc9c748b5d2b99a6e97-Abstract.html", "aff_unique_index": "0+1;0+0;0+2;0+0;0+0", "aff_unique_norm": "Princeton University;Korea Institute for Advanced Study;Howard Hughes Medical Institute", "aff_unique_dep": "Department of Physics;School of Computational Sciences;", "aff_unique_url": "https://www.princeton.edu;https://www.kaist.edu;https://www.hhmi.org", "aff_unique_abbr": "Princeton;KIAS;HHMI", "aff_campus_unique_index": ";1;1;1;1", "aff_campus_unique": ";Princeton", "aff_country_unique_index": "0+1;0+0;0+0;0+0;0+0", "aff_country_unique": "United States;South Korea" }, { "title": "Adversarial Multiclass Classification: A Risk Minimization Perspective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7206", "id": "7206", "author_site": "Rizal Fathony, Anqi Liu, Kaiser Asif, Brian Ziebart", "author": "Rizal Fathony; Anqi Liu; Kaiser Asif; Brian Ziebart", "abstract": "Recently proposed adversarial classification methods have shown promising results for cost sensitive and multivariate losses. In contrast with empirical risk minimization (ERM) methods, which use convex surrogate losses to approximate the desired non-convex target loss function, adversarial methods minimize non-convex losses by treating the properties of the training data as being uncertain and worst case within a minimax game. Despite this difference in formulation, we recast adversarial classification under zero-one loss as an ERM method with a novel prescribed loss function. We demonstrate a number of theoretical and practical advantages over the very closely related hinge loss ERM methods. This establishes adversarial classification under the zero-one loss as a method that fills the long standing gap in multiclass hinge loss classification, simultaneously guaranteeing Fisher consistency and universal consistency, while also providing dual parameter sparsity and high accuracy predictions in practice.", "bibtex": "@inproceedings{NIPS2016_ad13a2a0,\n author = {Fathony, Rizal and Liu, Anqi and Asif, Kaiser and Ziebart, Brian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Adversarial Multiclass Classification: A Risk Minimization Perspective},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ad13a2a07ca4b7642959dc0c4c740ab6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ad13a2a07ca4b7642959dc0c4c740ab6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ad13a2a07ca4b7642959dc0c4c740ab6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ad13a2a07ca4b7642959dc0c4c740ab6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ad13a2a07ca4b7642959dc0c4c740ab6-Reviews.html", "metareview": "", "pdf_size": 2864348, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6008741397717191791&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, University of Illinois at Chicago; Department of Computer Science, University of Illinois at Chicago; Department of Computer Science, University of Illinois at Chicago; Department of Computer Science, University of Illinois at Chicago", "aff_domain": "uic.edu;uic.edu;uic.edu;uic.edu", "email": "uic.edu;uic.edu;uic.edu;uic.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ad13a2a07ca4b7642959dc0c4c740ab6-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Agnostic Estimation for Misspecified Phase Retrieval Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7102", "id": "7102", "author_site": "Matey Neykov, Zhaoran Wang, Han Liu", "author": "Matey Neykov; Zhaoran Wang; Han Liu", "abstract": "The goal of noisy high-dimensional phase retrieval is to estimate an $s$-sparse parameter $\\boldsymbol{\\beta}^*\\in \\mathbb{R}^d$ from $n$ realizations of the model $Y = (\\boldsymbol{X}^{\\top} \\boldsymbol{\\beta}^*)^2 + \\varepsilon$. Based on this model, we propose a significant semi-parametric generalization called misspecified phase retrieval (MPR), in which $Y = f(\\boldsymbol{X}^{\\top}\\boldsymbol{\\beta}^*, \\varepsilon)$ with unknown $f$ and $\\operatorname{Cov}(Y, (\\boldsymbol{X}^{\\top}\\boldsymbol{\\beta}^*)^2) > 0$. For example, MPR encompasses $Y = h(|\\boldsymbol{X}^{\\top} \\boldsymbol{\\beta}^*|) + \\varepsilon$ with increasing $h$ as a special case. Despite the generality of the MPR model, it eludes the reach of most existing semi-parametric estimators. In this paper, we propose an estimation procedure, which consists of solving a cascade of two convex programs and provably recovers the direction of $\\boldsymbol{\\beta}^*$. Our theory is backed up by thorough numerical results.", "bibtex": "@inproceedings{NIPS2016_f48c04ff,\n author = {Neykov, Matey and Wang, Zhaoran and Liu, Han},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Agnostic Estimation for Misspecified Phase Retrieval Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f48c04ffab49ff0e5d1176244fdfb65c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f48c04ffab49ff0e5d1176244fdfb65c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f48c04ffab49ff0e5d1176244fdfb65c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f48c04ffab49ff0e5d1176244fdfb65c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f48c04ffab49ff0e5d1176244fdfb65c-Reviews.html", "metareview": "", "pdf_size": 1442192, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7433044864311448062&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Department of Operations Research and Financial Engineering, Princeton University, Princeton, NJ 08544; Department of Operations Research and Financial Engineering, Princeton University, Princeton, NJ 08544; Department of Operations Research and Financial Engineering, Princeton University, Princeton, NJ 08544", "aff_domain": "princeton.edu;princeton.edu;princeton.edu", "email": "princeton.edu;princeton.edu;princeton.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f48c04ffab49ff0e5d1176244fdfb65c-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "Department of Operations Research and Financial Engineering", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Princeton", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Algorithms and matching lower bounds for approximately-convex optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6919", "id": "6919", "author_site": "Andrej Risteski, Yuanzhi Li", "author": "Andrej Risteski; Yuanzhi Li", "abstract": "In recent years, a rapidly increasing number of applications in practice requires solving non-convex objectives, like training neural networks, learning graphical models, maximum likelihood estimation etc. Though simple heuristics such as gradient descent with very few modifications tend to work well, theoretical understanding is very weak. We consider possibly the most natural class of non-convex functions where one could hope to obtain provable guarantees: functions that are ``approximately convex'', i.e. functions $\\tf: \\Real^d \\to \\Real$ for which there exists a \\emph{convex function} $f$ such that for all $x$, $|\\tf(x) - f(x)| \\le \\errnoise$ for a fixed value $\\errnoise$. We then want to minimize $\\tf$, i.e. output a point $\\tx$ such that $\\tf(\\tx) \\le \\min_{x} \\tf(x) + \\err$. It is quite natural to conjecture that for fixed $\\err$, the problem gets harder for larger $\\errnoise$, however, the exact dependency of $\\err$ and $\\errnoise$ is not known. In this paper, we strengthen the known \\emph{information theoretic} lower bounds on the trade-off between $\\err$ and $\\errnoise$ substantially, and exhibit an algorithm that matches these lower bounds for a large class of convex bodies.", "bibtex": "@inproceedings{NIPS2016_186fb23a,\n author = {Risteski, Andrej and Li, Yuanzhi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Algorithms and matching lower bounds for approximately-convex optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/186fb23a33995d91ce3c2212189178c8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/186fb23a33995d91ce3c2212189178c8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/186fb23a33995d91ce3c2212189178c8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/186fb23a33995d91ce3c2212189178c8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/186fb23a33995d91ce3c2212189178c8-Reviews.html", "metareview": "", "pdf_size": 304206, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2738010132048318980&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Department of Computer Science, Princeton University; Department of Computer Science, Princeton University", "aff_domain": "cs.princeton.edu;cs.princeton.edu", "email": "cs.princeton.edu;cs.princeton.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/186fb23a33995d91ce3c2212189178c8-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "An Architecture for Deep, Hierarchical Generative Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8503", "id": "8503", "author": "Philip Bachman", "abstract": "We present an architecture which lets us train deep, directed generative models with many layers of latent variables. We include deterministic paths between all latent variables and the generated output, and provide a richer set of connections between computations for inference and generation, which enables more effective communication of information throughout the model during training. To improve performance on natural images, we incorporate a lightweight autoregressive model in the reconstruction distribution. These techniques permit end-to-end training of models with 10+ layers of latent variables. Experiments show that our approach achieves state-of-the-art performance on standard image modelling benchmarks, can expose latent class structure in the absence of label information, and can provide convincing imputations of occluded regions in natural images.", "bibtex": "@inproceedings{NIPS2016_49d4b2fa,\n author = {Bachman, Philip},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {An Architecture for Deep, Hierarchical Generative Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/49d4b2faeb4b7b9e745775793141e2b2-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/49d4b2faeb4b7b9e745775793141e2b2-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/49d4b2faeb4b7b9e745775793141e2b2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/49d4b2faeb4b7b9e745775793141e2b2-Reviews.html", "metareview": "", "pdf_size": 2484792, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13573375793737251431&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Maluuba Research", "aff_domain": "maluuba.com", "email": "maluuba.com", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/49d4b2faeb4b7b9e745775793141e2b2-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Maluuba", "aff_unique_dep": "Research", "aff_unique_url": "https://maluuba.com", "aff_unique_abbr": "Maluuba", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "title": "An Efficient Streaming Algorithm for the Submodular Cover Problem", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7280", "id": "7280", "author_site": "Ashkan Norouzi-Fard, Abbas Bazzi, Ilija Bogunovic, Marwa El Halabi, Ya-Ping Hsieh, Volkan Cevher", "author": "Ashkan Norouzi-Fard; Abbas Bazzi; Ilija Bogunovic; Marwa El Halabi; Ya-Ping Hsieh; Volkan Cevher", "abstract": "We initiate the study of the classical Submodular Cover (SC) problem in the data streaming model which we refer to as the Streaming Submodular Cover (SSC). We show that any single pass streaming algorithm using sublinear memory in the size of the stream will fail to provide any non-trivial approximation guarantees for SSC. Hence, we consider a relaxed version of SSC, where we only seek to find a partial cover. We design the first Efficient bicriteria Submodular Cover Streaming (ESC-Streaming) algorithm for this problem, and provide theoretical guarantees for its performance supported by numerical evidence. Our algorithm finds solutions that are competitive with the near-optimal offline greedy algorithm despite requiring only a single pass over the data stream. In our numerical experiments, we evaluate the performance of ESC-Streaming on active set selection and large-scale graph cover problems.", "bibtex": "@inproceedings{NIPS2016_329e6581,\n author = {Norouzi-Fard, Ashkan and Bazzi, Abbas and Bogunovic, Ilija and El Halabi, Marwa and Hsieh, Ya-Ping and Cevher, Volkan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {An Efficient Streaming Algorithm for the Submodular Cover Problem},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/329e6581efbc90bd92a1f22c4ba2103d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/329e6581efbc90bd92a1f22c4ba2103d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/329e6581efbc90bd92a1f22c4ba2103d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/329e6581efbc90bd92a1f22c4ba2103d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/329e6581efbc90bd92a1f22c4ba2103d-Reviews.html", "metareview": "", "pdf_size": 526589, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6513681279642761483&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Theory of Computation Laboratory 2 (THL2), EPFL; Theory of Computation Laboratory 2 (THL2), EPFL; Laboratory for Information and Inference Systems (LIONS), EPFL; Laboratory for Information and Inference Systems (LIONS), EPFL; Laboratory for Information and Inference Systems (LIONS), EPFL; Laboratory for Information and Inference Systems (LIONS), EPFL", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;epfl.ch;epfl.ch;epfl.ch", "email": "epfl.ch;epfl.ch;epfl.ch;epfl.ch;epfl.ch;epfl.ch", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/329e6581efbc90bd92a1f22c4ba2103d-Abstract.html", "aff_unique_index": "0;0;1;1;1;1", "aff_unique_norm": "Ecole Polytechnique Federale de Lausanne;EPFL", "aff_unique_dep": "Theory of Computation Laboratory 2 (THL2);Laboratory for Information and Inference Systems (LIONS)", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "An Online Sequence-to-Sequence Model Using Partial Conditioning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7086", "id": "7086", "author_site": "Navdeep Jaitly, Quoc V Le, Oriol Vinyals, Ilya Sutskever, David Sussillo, Samy Bengio", "author": "Navdeep Jaitly; Quoc V Le; Oriol Vinyals; Ilya Sutskever; David Sussillo; Samy Bengio", "abstract": "Sequence-to-sequence models have achieved impressive results on various tasks. However, they are unsuitable for tasks that require incremental predictions to be made as more data arrives or tasks that have long input sequences and output sequences. This is because they generate an output sequence conditioned on an entire input sequence. In this paper, we present a Neural Transducer that can make incremental predictions as more input arrives, without redoing the entire computation. Unlike sequence-to-sequence models, the Neural Transducer computes the next-step distribution conditioned on the partially observed input sequence and the partially generated sequence. At each time step, the transducer can decide to emit zero to many output symbols. The data can be processed using an encoder and presented as input to the transducer. The discrete decision to emit a symbol at every time step makes it difficult to learn with conventional backpropagation. It is however possible to train the transducer by using a dynamic programming algorithm to generate target discrete decisions. Our experiments show that the Neural Transducer works well in settings where it is required to produce output predictions as data come in. We also find that the Neural Transducer performs well for long sequences even when attention mechanisms are not used.", "bibtex": "@inproceedings{NIPS2016_312351bf,\n author = {Jaitly, Navdeep and Le, Quoc V and Vinyals, Oriol and Sutskever, Ilya and Sussillo, David and Bengio, Samy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {An Online Sequence-to-Sequence Model Using Partial Conditioning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/312351bff07989769097660a56395065-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/312351bff07989769097660a56395065-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/312351bff07989769097660a56395065-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/312351bff07989769097660a56395065-Reviews.html", "metareview": "", "pdf_size": 643117, "gs_citation": 138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5086710091749818625&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "aff": "Google Brain; Google Brain; Google Brain; Google DeepMind; Open AI*; Google Brain", "aff_domain": "google.com;google.com;google.com;google.com;openai.com;google.com", "email": "google.com;google.com;google.com;google.com;openai.com;google.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/312351bff07989769097660a56395065-Abstract.html", "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Google;OpenAI", "aff_unique_dep": "Google Brain;", "aff_unique_url": "https://brain.google.com;https://openai.com", "aff_unique_abbr": "Google Brain;OpenAI", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "An algorithm for L1 nearest neighbor search via monotonic embedding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7137", "id": "7137", "author_site": "Xinan Wang, Sanjoy Dasgupta", "author": "Xinan Wang; Sanjoy Dasgupta", "abstract": "Fast algorithms for nearest neighbor (NN) search have in large part focused on L2 distance. Here we develop an approach for L1 distance that begins with an explicit and exact embedding of the points into L2. We show how this embedding can efficiently be combined with random projection methods for L2 NN search, such as locality-sensitive hashing or random projection trees. We rigorously establish the correctness of the methodology and show by experimentation that it is competitive in practice with available alternatives.", "bibtex": "@inproceedings{NIPS2016_46922a08,\n author = {Wang, Xinan and Dasgupta, Sanjoy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {An algorithm for L1 nearest neighbor search via monotonic embedding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/46922a0880a8f11f8f69cbb52b1396be-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/46922a0880a8f11f8f69cbb52b1396be-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/46922a0880a8f11f8f69cbb52b1396be-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/46922a0880a8f11f8f69cbb52b1396be-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/46922a0880a8f11f8f69cbb52b1396be-Reviews.html", "metareview": "", "pdf_size": 133733, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9104932593514790209&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "UC San Diego; UC San Diego", "aff_domain": "ucsd.edu;cs.ucsd.edu", "email": "ucsd.edu;cs.ucsd.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/46922a0880a8f11f8f69cbb52b1396be-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "An ensemble diversity approach to supervised binary hashing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7282", "id": "7282", "author_site": "Miguel A. Carreira-Perpinan, Ramin Raziperchikolaei", "author": "Miguel A. Carreira-Perpinan; Ramin Raziperchikolaei", "abstract": "Binary hashing is a well-known approach for fast approximate nearest-neighbor search in information retrieval. Much work has focused on affinity-based objective functions involving the hash functions or binary codes. These objective functions encode neighborhood information between data points and are often inspired by manifold learning algorithms. They ensure that the hash functions differ from each other through constraints or penalty terms that encourage codes to be orthogonal or dissimilar across bits, but this couples the binary variables and complicates the already difficult optimization. We propose a much simpler approach: we train each hash function (or bit) independently from each other, but introduce diversity among them using techniques from classifier ensembles. Surprisingly, we find that not only is this faster and trivially parallelizable, but it also improves over the more complex, coupled objective function, and achieves state-of-the-art precision and recall in experiments with image retrieval.", "bibtex": "@inproceedings{NIPS2016_67f7fb87,\n author = {Carreira-Perpinan, Miguel A. and Raziperchikolaei, Ramin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {An ensemble diversity approach to supervised binary hashing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/67f7fb873eaf29526a11a9b7ac33bfac-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/67f7fb873eaf29526a11a9b7ac33bfac-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/67f7fb873eaf29526a11a9b7ac33bfac-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/67f7fb873eaf29526a11a9b7ac33bfac-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/67f7fb873eaf29526a11a9b7ac33bfac-Reviews.html", "metareview": "", "pdf_size": 301216, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6773664200299194649&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "EECS, University of California, Merced; EECS, University of California, Merced", "aff_domain": "ucmerced.edu;ucmerced.edu", "email": "ucmerced.edu;ucmerced.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/67f7fb873eaf29526a11a9b7ac33bfac-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Merced", "aff_unique_dep": "EECS", "aff_unique_url": "https://www.ucmerced.edu", "aff_unique_abbr": "UC Merced", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Merced", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "An equivalence between high dimensional Bayes optimal inference and M-estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7203", "id": "7203", "author_site": "Madhu Advani, Surya Ganguli", "author": "Madhu Advani; Surya Ganguli", "abstract": "Due to the computational difficulty of performing MMSE (minimum mean squared error) inference, maximum a posteriori (MAP) is often used as a surrogate. However, the accuracy of MAP is suboptimal for high dimensional inference, where the number of model parameters is of the same order as the number of samples. In this work we demonstrate how MMSE performance is asymptotically achievable via optimization with an appropriately selected convex penalty and regularization function which are a smoothed version of the widely applied MAP algorithm. Our findings provide a new derivation and interpretation for recent optimal M-estimators discovered by El Karoui, et. al. PNAS 2013 as well as extending to non-additive noise models. We demonstrate the performance of these optimal M-estimators with numerical simulations. Overall, at the heart of our work is the revelation of a remarkable equivalence between two seemingly very different computational problems: namely that of high dimensional Bayesian integration, and high dimensional convex optimization. In essence we show that the former computationally difficult integral may be computed by solving the latter, simpler optimization problem.", "bibtex": "@inproceedings{NIPS2016_08e6bea8,\n author = {Advani, Madhu and Ganguli, Surya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {An equivalence between high dimensional Bayes optimal inference and M-estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/08e6bea8e90ba87af3c9554d94db6579-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/08e6bea8e90ba87af3c9554d94db6579-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/08e6bea8e90ba87af3c9554d94db6579-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/08e6bea8e90ba87af3c9554d94db6579-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/08e6bea8e90ba87af3c9554d94db6579-Reviews.html", "metareview": "", "pdf_size": 335731, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=749075307059750203&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of Applied Physics, Stanford University; Department of Applied Physics, Stanford University", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/08e6bea8e90ba87af3c9554d94db6579-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Department of Applied Physics", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "An urn model for majority voting in classification ensembles", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7033", "id": "7033", "author_site": "Victor Soto, Alberto Su\u00e1rez, Gonzalo Martinez-Mu\u00f1oz", "author": "Victor Soto; Alberto Su\u00e1rez; Gonzalo Martinez-Mu\u00f1oz", "abstract": "In this work we analyze the class prediction of parallel randomized ensembles by majority voting as an urn model. For a given test instance, the ensemble can be viewed as an urn of marbles of different colors. A marble represents an individual classifier. Its color represents the class label prediction of the corresponding classifier. The sequential querying of classifiers in the ensemble can be seen as draws without replacement from the urn. An analysis of this classical urn model based on the hypergeometric distribution makes it possible to estimate the confidence on the outcome of majority voting when only a fraction of the individual predictions is known. These estimates can be used to speed up the prediction by the ensemble. Specifically, the aggregation of votes can be halted when the confidence in the final prediction is sufficiently high. If one assumes a uniform prior for the distribution of possible votes the analysis is shown to be equivalent to a previous one based on Dirichlet distributions. The advantage of the current approach is that prior knowledge on the possible vote outcomes can be readily incorporated in a Bayesian framework. We show how incorporating this type of problem-specific knowledge into the statistical analysis of majority voting leads to faster classification by the ensemble and allows us to estimate the expected average speed-up beforehand.", "bibtex": "@inproceedings{NIPS2016_d1a21da7,\n author = {Soto, Victor and Su\\'{a}rez, Alberto and Martinez-Mu\\~{n}oz, Gonzalo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {An urn model for majority voting in classification ensembles},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d1a21da7bca4abff8b0b61b87597de73-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d1a21da7bca4abff8b0b61b87597de73-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d1a21da7bca4abff8b0b61b87597de73-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d1a21da7bca4abff8b0b61b87597de73-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d1a21da7bca4abff8b0b61b87597de73-Reviews.html", "metareview": "", "pdf_size": 299749, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12568846377057565338&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Computer Science Department, Columbia University, New York, NY, USA; Computer Science Department, Universidad Aut\u00f3noma de Madrid, Madrid, Spain; Computer Science Department, Universidad Aut\u00f3noma de Madrid, Madrid, Spain", "aff_domain": "cs.columbia.edu;uam.es;uam.es", "email": "cs.columbia.edu;uam.es;uam.es", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d1a21da7bca4abff8b0b61b87597de73-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Columbia University;Universidad Aut\u00f3noma de Madrid", "aff_unique_dep": "Computer Science Department;Computer Science Department", "aff_unique_url": "https://www.columbia.edu;https://www.uam.es", "aff_unique_abbr": "Columbia;UAM", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "New York;Madrid", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Spain" }, { "title": "Ancestral Causal Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7114", "id": "7114", "author_site": "Sara Magliacane, Tom Claassen, Joris Mooij", "author": "Sara Magliacane; Tom Claassen; Joris M. Mooij", "abstract": "Constraint-based causal discovery from limited data is a notoriously difficult challenge due to the many borderline independence test decisions. Several approaches to improve the reliability of the predictions by exploiting redundancy in the independence information have been proposed recently. Though promising, existing approaches can still be greatly improved in terms of accuracy and scalability. We present a novel method that reduces the combinatorial explosion of the search space by using a more coarse-grained representation of causal information, drastically reducing computation time. Additionally, we propose a method to score causal predictions based on their confidence. Crucially, our implementation also allows one to easily combine observational and interventional data and to incorporate various types of available background knowledge. We prove soundness and asymptotic consistency of our method and demonstrate that it can outperform the state-of-the-art on synthetic data, achieving a speedup of several orders of magnitude. We illustrate its practical feasibility by applying it on a challenging protein data set.", "bibtex": "@inproceedings{NIPS2016_f3d9de86,\n author = {Magliacane, Sara and Claassen, Tom and Mooij, Joris M},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Ancestral Causal Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f3d9de86462c28781cbe5c47ef22c3e5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f3d9de86462c28781cbe5c47ef22c3e5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f3d9de86462c28781cbe5c47ef22c3e5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f3d9de86462c28781cbe5c47ef22c3e5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f3d9de86462c28781cbe5c47ef22c3e5-Reviews.html", "metareview": "", "pdf_size": 4421855, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10341678159221695398&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 20, "aff": "VU Amsterdam + University of Amsterdam; Radboud University Nijmegen; University of Amsterdam", "aff_domain": "gmail.com;cs.ru.nl;uva.nl", "email": "gmail.com;cs.ru.nl;uva.nl", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f3d9de86462c28781cbe5c47ef22c3e5-Abstract.html", "aff_unique_index": "0+1;2;1", "aff_unique_norm": "Vrije Universiteit Amsterdam;University of Amsterdam;Radboud University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vu.nl;https://www.uva.nl;https://www.ru.nl/", "aff_unique_abbr": "VU;UvA;RU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Amsterdam;;Nijmegen", "aff_country_unique_index": "0+0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Anchor-Free Correlated Topic Modeling: Identifiability and Algorithm", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7067", "id": "7067", "author_site": "Kejun Huang, Xiao Fu, Nikolaos D. Sidiropoulos", "author": "Kejun Huang; Xiao Fu; Nikolaos D. Sidiropoulos", "abstract": "In topic modeling, many algorithms that guarantee identifiability of the topics have been developed under the premise that there exist anchor words -- i.e., words that only appear (with positive probability) in one topic. Follow-up work has resorted to three or higher-order statistics of the data corpus to relax the anchor word assumption. Reliable estimates of higher-order statistics are hard to obtain, however, and the identification of topics under those models hinges on uncorrelatedness of the topics, which can be unrealistic. This paper revisits topic modeling based on second-order moments, and proposes an anchor-free topic mining framework. The proposed approach guarantees the identification of the topics under a much milder condition compared to the anchor-word assumption, thereby exhibiting much better robustness in practice. The associated algorithm only involves one eigen-decomposition and a few small linear programs. This makes it easy to implement and scale up to very large problem instances. Experiments using the TDT2 and Reuters-21578 corpus demonstrate that the proposed anchor-free approach exhibits very favorable performance (measured using coherence, similarity count, and clustering accuracy metrics) compared to the prior art.", "bibtex": "@inproceedings{NIPS2016_d707329b,\n author = {Huang, Kejun and Fu, Xiao and Sidiropoulos, Nikolaos D.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Anchor-Free Correlated Topic Modeling: Identifiability and Algorithm},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d707329bece455a462b58ce00d1194c9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d707329bece455a462b58ce00d1194c9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d707329bece455a462b58ce00d1194c9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d707329bece455a462b58ce00d1194c9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d707329bece455a462b58ce00d1194c9-Reviews.html", "metareview": "", "pdf_size": 511445, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9718090337239613497&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of Electrical and Computer Engineering, University of Minnesota; Department of Electrical and Computer Engineering, University of Minnesota; Department of Electrical and Computer Engineering, University of Minnesota", "aff_domain": "umn.edu;umn.edu;ece.umn.edu", "email": "umn.edu;umn.edu;ece.umn.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d707329bece455a462b58ce00d1194c9-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Minnesota", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.umn.edu", "aff_unique_abbr": "UMN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Approximate maximum entropy principles via Goemans-Williamson with applications to provable variational methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7300", "id": "7300", "author_site": "Andrej Risteski, Yuanzhi Li", "author": "Andrej Risteski; Yuanzhi Li", "abstract": "The well known maximum-entropy principle due to Jaynes, which states that given mean parameters, the maximum entropy distribution matching them is in an exponential family has been very popular in machine learning due to its \u201cOccam\u2019s razor\u201d interpretation. Unfortunately, calculating the potentials in the maximum entropy distribution is intractable [BGS14]. We provide computationally efficient versions of this principle when the mean parameters are pairwise moments: we design distributions that approximately match given pairwise moments, while having entropy which is comparable to the maximum entropy distribution matching those moments. We additionally provide surprising applications of the approximate maximum entropy principle to designing provable variational methods for partition function calculations for Ising models without any assumptions on the potentials of the model. More precisely, we show that we can get approximation guarantees for the log-partition function comparable to those in the low-temperature limit, which is the setting of optimization of quadratic forms over the hypercube. ([AN06])", "bibtex": "@inproceedings{NIPS2016_046ddf96,\n author = {Risteski, Andrej and Li, Yuanzhi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Approximate maximum entropy principles via Goemans-Williamson with applications to provable variational methods},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/046ddf96c233a273fd390c3d0b1a9aa4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/046ddf96c233a273fd390c3d0b1a9aa4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/046ddf96c233a273fd390c3d0b1a9aa4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/046ddf96c233a273fd390c3d0b1a9aa4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/046ddf96c233a273fd390c3d0b1a9aa4-Reviews.html", "metareview": "", "pdf_size": 345113, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1913773942539997971&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff": "Department of Computer Science, Princeton University; Department of Computer Science, Princeton University", "aff_domain": "cs.princeton.edu;cs.princeton.edu", "email": "cs.princeton.edu;cs.princeton.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/046ddf96c233a273fd390c3d0b1a9aa4-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Architectural Complexity Measures of Recurrent Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7303", "id": "7303", "author_site": "Saizheng Zhang, Yuhuai Wu, Tong Che, Zhouhan Lin, Roland Memisevic, Russ Salakhutdinov, Yoshua Bengio", "author": "Saizheng Zhang; Yuhuai Wu; Tong Che; Zhouhan Lin; Roland Memisevic; Ruslan Salakhutdinov; Yoshua Bengio", "abstract": "In this paper, we systematically analyze the connecting architectures of recurrent neural networks (RNNs). Our main contribution is twofold: first, we present a rigorous graph-theoretic framework describing the connecting architectures of RNNs in general. Second, we propose three architecture complexity measures of RNNs: (a) the recurrent depth, which captures the RNN\u2019s over-time nonlinear complexity, (b) the feedforward depth, which captures the local input-output nonlinearity (similar to the \u201cdepth\u201d in feedforward neural networks (FNNs)), and (c) the recurrent skip coefficient which captures how rapidly the information propagates over time. We rigorously prove each measure\u2019s existence and computability. Our experimental results show that RNNs might benefit from larger recurrent depth and feedforward depth. We further demonstrate that increasing recurrent skip coefficient offers performance boosts on long term dependency problems.", "bibtex": "@inproceedings{NIPS2016_860320be,\n author = {Zhang, Saizheng and Wu, Yuhuai and Che, Tong and Lin, Zhouhan and Memisevic, Roland and Salakhutdinov, Russ R and Bengio, Yoshua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Architectural Complexity Measures of Recurrent Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/860320be12a1c050cd7731794e231bd3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/860320be12a1c050cd7731794e231bd3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/860320be12a1c050cd7731794e231bd3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/860320be12a1c050cd7731794e231bd3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/860320be12a1c050cd7731794e231bd3-Reviews.html", "metareview": "", "pdf_size": 410594, "gs_citation": 206, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9430461092837132372&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": ";;;;;;", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/860320be12a1c050cd7731794e231bd3-Abstract.html" }, { "title": "Assortment Optimization Under the Mallows model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7349", "id": "7349", "author_site": "Antoine Desir, Vineet Goyal, Srikanth Jagabathula, Danny Segev", "author": "Antoine Desir; Vineet Goyal; Srikanth Jagabathula; Danny Segev", "abstract": "We consider the assortment optimization problem when customer preferences follow a mixture of Mallows distributions. The assortment optimization problem focuses on determining the revenue/profit maximizing subset of products from a large universe of products; it is an important decision that is commonly faced by retailers in determining what to offer their customers. There are two key challenges: (a) the Mallows distribution lacks a closed-form expression (and requires summing an exponential number of terms) to compute the choice probability and, hence, the expected revenue/profit per customer; and (b) finding the best subset may require an exhaustive search. Our key contributions are an efficiently computable closed-form expression for the choice probability under the Mallows model and a compact mixed integer linear program (MIP) formulation for the assortment problem.", "bibtex": "@inproceedings{NIPS2016_466accba,\n author = {Desir, Antoine and Goyal, Vineet and Jagabathula, Srikanth and Segev, Danny},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Assortment Optimization Under the Mallows model},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/466accbac9a66b805ba50e42ad715740-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/466accbac9a66b805ba50e42ad715740-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/466accbac9a66b805ba50e42ad715740-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/466accbac9a66b805ba50e42ad715740-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/466accbac9a66b805ba50e42ad715740-Reviews.html", "metareview": "", "pdf_size": 405437, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15409062514807038338&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "IEOR Department, Columbia University; IEOR Department, Columbia University; IOMS Department, NYU Stern School of Business; Department of Statistics, University of Haifa", "aff_domain": "ieor.columbia.edu;ieor.columbia.edu;stern.nyu.edu;stat.haifa.ac.il", "email": "ieor.columbia.edu;ieor.columbia.edu;stern.nyu.edu;stat.haifa.ac.il", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/466accbac9a66b805ba50e42ad715740-Abstract.html", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Columbia University;New York University Stern School of Business;University of Haifa", "aff_unique_dep": "IEOR Department;IOMS Department;Department of Statistics", "aff_unique_url": "https://www.columbia.edu;https://www.stern.nyu.edu;https://www.haifa.ac.il", "aff_unique_abbr": "Columbia;NYU Stern;UoH", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Israel" }, { "title": "Asynchronous Parallel Greedy Coordinate Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7293", "id": "7293", "author_site": "Yang You, Xiangru Lian, Ji Liu, Hsiang-Fu Yu, Inderjit Dhillon, James Demmel, Cho-Jui Hsieh", "author": "Yang You; Xiangru Lian; Ji Liu; Hsiang-Fu Yu; Inderjit S Dhillon; James Demmel; Cho-Jui Hsieh", "abstract": "n this paper, we propose and study an Asynchronous parallel Greedy Coordinate Descent (Asy-GCD) algorithm for minimizing a smooth function with bounded constraints. At each iteration, workers asynchronously conduct greedy coordinate descent updates on a block of variables. In the first part of the paper, we analyze the theoretical behavior of Asy-GCD and prove a linear convergence rate. In the second part, we develop an efficient kernel SVM solver based on Asy-GCD in the shared memory multi-core setting. Since our algorithm is fully asynchronous---each core does not need to idle and wait for the other cores---the resulting algorithm enjoys good speedup and outperforms existing multi-core kernel SVM solvers including asynchronous stochastic coordinate descent and multi-core LIBSVM.", "bibtex": "@inproceedings{NIPS2016_43e4e6a6,\n author = {You, Yang and Lian, Xiangru and Liu, Ji and Yu, Hsiang-Fu and Dhillon, Inderjit S and Demmel, James and Hsieh, Cho-Jui},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Asynchronous Parallel Greedy Coordinate Descent},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/43e4e6a6f341e00671e123714de019a8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/43e4e6a6f341e00671e123714de019a8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/43e4e6a6f341e00671e123714de019a8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/43e4e6a6f341e00671e123714de019a8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/43e4e6a6f341e00671e123714de019a8-Reviews.html", "metareview": "", "pdf_size": 1027696, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8150338743266299291&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of California, Berkeley; University of Rochester; University of Rochester; University of Texas, Austin; University of Texas, Austin; University of California, Berkeley; University of California, Davis", "aff_domain": "cs.berkeley.edu;yandex.com;cs.rochester.edu;cs.utexas.edu;cs.utexas.edu;eecs.berkeley.edu;cs.ucdavis.edu", "email": "cs.berkeley.edu;yandex.com;cs.rochester.edu;cs.utexas.edu;cs.utexas.edu;eecs.berkeley.edu;cs.ucdavis.edu", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/43e4e6a6f341e00671e123714de019a8-Abstract.html", "aff_unique_index": "0;1;1;2;2;0;3", "aff_unique_norm": "University of California, Berkeley;University of Rochester;University of Texas at Austin;University of California, Davis", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.berkeley.edu;https://www.rochester.edu;https://www.utexas.edu;https://www.ucdavis.edu", "aff_unique_abbr": "UC Berkeley;U of R;UT Austin;UC Davis", "aff_campus_unique_index": "0;2;2;0;3", "aff_campus_unique": "Berkeley;;Austin;Davis", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Attend, Infer, Repeat: Fast Scene Understanding with Generative Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7281", "id": "7281", "author_site": "S. M. Ali Eslami, Nicolas Heess, Theophane Weber, Yuval Tassa, David Szepesvari, koray kavukcuoglu, Geoffrey E Hinton", "author": "S. M. Ali Eslami; Nicolas Heess; Theophane Weber; Yuval Tassa; David Szepesvari; koray kavukcuoglu; Geoffrey E. Hinton", "abstract": "We present a framework for efficient inference in structured image models that explicitly reason about objects. We achieve this by performing probabilistic inference using a recurrent neural network that attends to scene elements and processes them one at a time. Crucially, the model itself learns to choose the appropriate number of inference steps. We use this scheme to learn to perform inference in partially specified 2D models (variable-sized variational auto-encoders) and fully specified 3D models (probabilistic renderers). We show that such models learn to identify multiple objects - counting, locating and classifying the elements of a scene - without any supervision, e.g., decomposing 3D images with various numbers of objects in a single forward pass of a neural network at unprecedented speed. We further show that the networks produce accurate inferences when compared to supervised counterparts, and that their structure leads to improved generalization.", "bibtex": "@inproceedings{NIPS2016_52947e0a,\n author = {Eslami, S. M. Ali and Heess, Nicolas and Weber, Theophane and Tassa, Yuval and Szepesvari, David and kavukcuoglu, koray and Hinton, Geoffrey E},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Attend, Infer, Repeat: Fast Scene Understanding with Generative Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/52947e0ade57a09e4a1386d08f17b656-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/52947e0ade57a09e4a1386d08f17b656-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/52947e0ade57a09e4a1386d08f17b656-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/52947e0ade57a09e4a1386d08f17b656-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/52947e0ade57a09e4a1386d08f17b656-Reviews.html", "metareview": "", "pdf_size": 1272119, "gs_citation": 634, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17841381849984749111&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 18, "aff": "Google DeepMind, London, UK; Google DeepMind, London, UK; Google DeepMind, London, UK; Google DeepMind, London, UK; Google DeepMind, London, UK; Google DeepMind, London, UK; Google DeepMind, London, UK", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/52947e0ade57a09e4a1386d08f17b656-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Automated scalable segmentation of neurons from multispectral images", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7027", "id": "7027", "author_site": "Uygar S\u00fcmb\u00fcl, Douglas Roossien, Dawen Cai, Fei Chen, Nicholas Barry, John Cunningham, Edward Boyden, Liam Paninski", "author": "Uygar S\u00fcmb\u00fcl; Douglas Roossien; Dawen Cai; Fei Chen; Nicholas Barry; John P. Cunningham; Edward Boyden; Liam Paninski", "abstract": "Reconstruction of neuroanatomy is a fundamental problem in neuroscience. Stochastic expression of colors in individual cells is a promising tool, although its use in the nervous system has been limited due to various sources of variability in expression. Moreover, the intermingled anatomy of neuronal trees is challenging for existing segmentation algorithms. Here, we propose a method to automate the segmentation of neurons in such (potentially pseudo-colored) images. The method uses spatio-color relations between the voxels, generates supervoxels to reduce the problem size by four orders of magnitude before the final segmentation, and is parallelizable over the supervoxels. To quantify performance and gain insight, we generate simulated images, where the noise level and characteristics, the density of expression, and the number of fluorophore types are variable. We also present segmentations of real Brainbow images of the mouse hippocampus, which reveal many of the dendritic segments.", "bibtex": "@inproceedings{NIPS2016_7cce53cf,\n author = {S\\\"{u}mb\\\"{u}l, Uygar and Roossien, Douglas and Cai, Dawen and Chen, Fei and Barry, Nicholas and Cunningham, John P and Boyden, Edward and Paninski, Liam},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Automated scalable segmentation of neurons from multispectral images},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7cce53cf90577442771720a370c3c723-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7cce53cf90577442771720a370c3c723-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7cce53cf90577442771720a370c3c723-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7cce53cf90577442771720a370c3c723-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7cce53cf90577442771720a370c3c723-Reviews.html", "metareview": "", "pdf_size": 9853498, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15946703348182655417&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": ";;;;;;;", "aff_domain": ";;;;;;;", "email": ";;;;;;;", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7cce53cf90577442771720a370c3c723-Abstract.html" }, { "title": "Automatic Neuron Detection in Calcium Imaging Data Using Convolutional Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7265", "id": "7265", "author_site": "Noah Apthorpe, Alexander Riordan, Robert Aguilar, Jan Homann, Yi Gu, David Tank, H. Sebastian Seung", "author": "Noah Apthorpe; Alexander Riordan; Robert Aguilar; Jan Homann; Yi Gu; David Tank; H. Sebastian Seung", "abstract": "Calcium imaging is an important technique for monitoring the activity of thousands of neurons simultaneously. As calcium imaging datasets grow in size, automated detection of individual neurons is becoming important. Here we apply a supervised learning approach to this problem and show that convolutional networks can achieve near-human accuracy and superhuman speed. Accuracy is superior to the popular PCA/ICA method based on precision and recall relative to ground truth annotation by a human expert. These results suggest that convolutional networks are an efficient and flexible tool for the analysis of large-scale calcium imaging data.", "bibtex": "@inproceedings{NIPS2016_0771fc6f,\n author = {Apthorpe, Noah and Riordan, Alexander and Aguilar, Robert and Homann, Jan and Gu, Yi and Tank, David and Seung, H. Sebastian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Automatic Neuron Detection in Calcium Imaging Data Using Convolutional Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0771fc6f0f4b1d7d1bb73bbbe14e0e31-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0771fc6f0f4b1d7d1bb73bbbe14e0e31-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0771fc6f0f4b1d7d1bb73bbbe14e0e31-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0771fc6f0f4b1d7d1bb73bbbe14e0e31-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0771fc6f0f4b1d7d1bb73bbbe14e0e31-Reviews.html", "metareview": "", "pdf_size": 5977998, "gs_citation": 113, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11095627882830112908&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Computer Science Department; Princeton Neuroscience Institute; Computer Science Department; Princeton Neuroscience Institute; Princeton Neuroscience Institute; Princeton Neuroscience Institute; Computer Science Department+Princeton Neuroscience Institute", "aff_domain": "princeton.edu;princeton.edu; ;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "email": "princeton.edu;princeton.edu; ;princeton.edu;princeton.edu;princeton.edu;princeton.edu", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0771fc6f0f4b1d7d1bb73bbbe14e0e31-Abstract.html", "aff_unique_index": "0;1;0;1;1;1;0+1", "aff_unique_norm": "Computer Science Department;Princeton University", "aff_unique_dep": "Computer Science;Princeton Neuroscience Institute", "aff_unique_url": ";https://www.princeton.edu", "aff_unique_abbr": ";Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1;1;1", "aff_country_unique": ";United States" }, { "title": "Average-case hardness of RIP certification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7389", "id": "7389", "author_site": "Tengyao Wang, Quentin Berthet, Yaniv Plan", "author": "Tengyao Wang; Quentin Berthet; Yaniv Plan", "abstract": "The restricted isometry property (RIP) for design matrices gives guarantees for optimal recovery in sparse linear models. It is of high interest in compressed sensing and statistical learning. This property is particularly important for computationally efficient recovery methods. As a consequence, even though it is in general NP-hard to check that RIP holds, there have been substantial efforts to find tractable proxies for it. These would allow the construction of RIP matrices and the polynomial-time verification of RIP given an arbitrary matrix. We consider the framework of average-case certifiers, that never wrongly declare that a matrix is RIP, while being often correct for random instances. While there are such functions which are tractable in a suboptimal parameter regime, we show that this is a computationally hard task in any better regime. Our results are based on a new, weaker assumption on the problem of detecting dense subgraphs.", "bibtex": "@inproceedings{NIPS2016_d54e99a6,\n author = {Wang, Tengyao and Berthet, Quentin and Plan, Yaniv},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Average-case hardness of RIP certification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d54e99a6c03704e95e6965532dec148b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d54e99a6c03704e95e6965532dec148b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d54e99a6c03704e95e6965532dec148b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d54e99a6c03704e95e6965532dec148b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d54e99a6c03704e95e6965532dec148b-Reviews.html", "metareview": "", "pdf_size": 329247, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18017488603721739885&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Centre for Mathematical Sciences, Cambridge, CB3 0WB, United Kingdom; Centre for Mathematical Sciences, Cambridge, CB3 0WB, United Kingdom; 1986 Mathematics Road, Vancouver BC V6T 1Z2, Canada", "aff_domain": "statslab.cam.ac.uk;statslab.cam.ac.uk;math.ubc.ca", "email": "statslab.cam.ac.uk;statslab.cam.ac.uk;math.ubc.ca", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d54e99a6c03704e95e6965532dec148b-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Cambridge;University of British Columbia", "aff_unique_dep": "Centre for Mathematical Sciences;", "aff_unique_url": "https://www.cam.ac.uk;https://www.ubc.ca", "aff_unique_abbr": "Cambridge;UBC", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Cambridge;Vancouver", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;Canada" }, { "title": "Avoiding Imposters and Delinquents: Adversarial Crowdsourcing and Peer Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7254", "id": "7254", "author_site": "Jacob Steinhardt, Gregory Valiant, Moses Charikar", "author": "Jacob Steinhardt; Gregory Valiant; Moses Charikar", "abstract": "We consider a crowdsourcing model in which n workers are asked to rate the quality of n items previously generated by other workers. An unknown set of $\\alpha n$ workers generate reliable ratings, while the remaining workers may behave arbitrarily and possibly adversarially. The manager of the experiment can also manually evaluate the quality of a small number of items, and wishes to curate together almost all of the high-quality items with at most an fraction of low-quality items. Perhaps surprisingly, we show that this is possible with an amount of work required of the manager, and each worker, that does not scale with n: the dataset can be curated with $\\tilde{O}(1/\\beta\\alpha\\epsilon^4)$ ratings per worker, and $\\tilde{O}(1/\\beta\\epsilon^2)$ ratings by the manager, where $\\beta$ is the fraction of high-quality items. Our results extend to the more general setting of peer prediction, including peer grading in online classrooms.", "bibtex": "@inproceedings{NIPS2016_0a87257e,\n author = {Steinhardt, Jacob and Valiant, Gregory and Charikar, Moses},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Avoiding Imposters and Delinquents: Adversarial Crowdsourcing and Peer Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0a87257e5308197df43230edf4ad1dae-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0a87257e5308197df43230edf4ad1dae-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0a87257e5308197df43230edf4ad1dae-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0a87257e5308197df43230edf4ad1dae-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0a87257e5308197df43230edf4ad1dae-Reviews.html", "metareview": "", "pdf_size": 290476, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11135391859213229063&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0a87257e5308197df43230edf4ad1dae-Abstract.html" }, { "title": "Backprop KF: Learning Discriminative Deterministic State Estimators", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7347", "id": "7347", "author_site": "Tuomas Haarnoja, Anurag Ajay, Sergey Levine, Pieter Abbeel", "author": "Tuomas Haarnoja; Anurag Ajay; Sergey Levine; Pieter Abbeel", "abstract": "Generative state estimators based on probabilistic filters and smoothers are one of the most popular classes of state estimators for robots and autonomous vehicles. However, generative models have limited capacity to handle rich sensory observations, such as camera images, since they must model the entire distribution over sensor readings. Discriminative models do not suffer from this limitation, but are typically more complex to train as latent variable models for state estimation. We present an alternative approach where the parameters of the latent state distribution are directly optimized as a deterministic computation graph, resulting in a simple and effective gradient descent algorithm for training discriminative state estimators. We show that this procedure can be used to train state estimators that use complex input, such as raw camera images, which must be processed using expressive nonlinear function approximators such as convolutional neural networks. Our model can be viewed as a type of recurrent neural network, and the connection to probabilistic filtering allows us to design a network architecture that is particularly well suited for state estimation. We evaluate our approach on synthetic tracking task with raw image inputs and on the visual odometry task in the KITTI dataset. The results show significant improvement over both standard generative approaches and regular recurrent neural networks.", "bibtex": "@inproceedings{NIPS2016_697e382c,\n author = {Haarnoja, Tuomas and Ajay, Anurag and Levine, Sergey and Abbeel, Pieter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Backprop KF: Learning Discriminative Deterministic State Estimators},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/697e382cfd25b07a3e62275d3ee132b3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/697e382cfd25b07a3e62275d3ee132b3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/697e382cfd25b07a3e62275d3ee132b3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/697e382cfd25b07a3e62275d3ee132b3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/697e382cfd25b07a3e62275d3ee132b3-Reviews.html", "metareview": "", "pdf_size": 909680, "gs_citation": 267, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16234950957356407408&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Computer Science, University of California, Berkeley; Department of Computer Science, University of California, Berkeley; Department of Computer Science, University of California, Berkeley; Department of Computer Science, University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/697e382cfd25b07a3e62275d3ee132b3-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Balancing Suspense and Surprise: Timely Decision Making with Endogenous Information Acquisition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7274", "id": "7274", "author_site": "Ahmed M. Alaa, Mihaela van der Schaar", "author": "Ahmed M. Alaa; Mihaela van der Schaar", "abstract": "We develop a Bayesian model for decision-making under time pressure with endogenous information acquisition. In our model, the decision-maker decides when to observe (costly) information by sampling an underlying continuous-time stochastic process (time series) that conveys information about the potential occurrence/non-occurrence of an adverse event which will terminate the decision-making process. In her attempt to predict the occurrence of the adverse event, the decision-maker follows a policy that determines when to acquire information from the time series (continuation), and when to stop acquiring information and make a final prediction (stopping). We show that the optimal policy has a \"rendezvous\" structure, i.e. a structure in which whenever a new information sample is gathered from the time series, the optimal \"date\" for acquiring the next sample becomes computable. The optimal interval between two information samples balances a trade-off between the decision maker\u2019s \"surprise\", i.e. the drift in her posterior belief after observing new information, and \"suspense\", i.e. the probability that the adverse event occurs in the time interval between two information samples. Moreover, we characterize the continuation and stopping regions in the decision-maker\u2019s state-space, and show that they depend not only on the decision-maker\u2019s beliefs, but also on the \"context\", i.e. the current realization of the time series.", "bibtex": "@inproceedings{NIPS2016_fe70c368,\n author = {Alaa, Ahmed M. and van der Schaar, Mihaela},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Balancing Suspense and Surprise: Timely Decision Making with Endogenous Information Acquisition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fe70c36866add1572a8e2b96bfede7bf-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fe70c36866add1572a8e2b96bfede7bf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/fe70c36866add1572a8e2b96bfede7bf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fe70c36866add1572a8e2b96bfede7bf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fe70c36866add1572a8e2b96bfede7bf-Reviews.html", "metareview": "", "pdf_size": 245301, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5118637046181250142&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Electrical Engineering Department, University of California, Los Angeles; Electrical Engineering Department, University of California, Los Angeles", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fe70c36866add1572a8e2b96bfede7bf-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "Electrical Engineering Department", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Barzilai-Borwein Step Size for Stochastic Gradient Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7382", "id": "7382", "author_site": "Conghui Tan, Shiqian Ma, Yu-Hong Dai, Yuqiu Qian", "author": "Conghui Tan; Shiqian Ma; Yu-Hong Dai; Yuqiu Qian", "abstract": "One of the major issues in stochastic gradient descent (SGD) methods is how to choose an appropriate step size while running the algorithm. Since the traditional line search technique does not apply for stochastic optimization methods, the common practice in SGD is either to use a diminishing step size, or to tune a step size by hand, which can be time consuming in practice. In this paper, we propose to use the Barzilai-Borwein (BB) method to automatically compute step sizes for SGD and its variant: stochastic variance reduced gradient (SVRG) method, which leads to two algorithms: SGD-BB and SVRG-BB. We prove that SVRG-BB converges linearly for strongly convex objective functions. As a by-product, we prove the linear convergence result of SVRG with Option I proposed in [10], whose convergence result has been missing in the literature. Numerical experiments on standard data sets show that the performance of SGD-BB and SVRG-BB is comparable to and sometimes even better than SGD and SVRG with best-tuned step sizes, and is superior to some advanced SGD variants.", "bibtex": "@inproceedings{NIPS2016_c86a7ee3,\n author = {Tan, Conghui and Ma, Shiqian and Dai, Yu-Hong and Qian, Yuqiu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Barzilai-Borwein Step Size for Stochastic Gradient Descent},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c86a7ee3d8ef0b551ed58e354a836f2b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c86a7ee3d8ef0b551ed58e354a836f2b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c86a7ee3d8ef0b551ed58e354a836f2b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c86a7ee3d8ef0b551ed58e354a836f2b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c86a7ee3d8ef0b551ed58e354a836f2b-Reviews.html", "metareview": "", "pdf_size": 671292, "gs_citation": 248, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9984287137700373945&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "The Chinese University of Hong Kong; The Chinese University of Hong Kong; Chinese Academy of Sciences, Beijing, China; The University of Hong Kong", "aff_domain": "se.cuhk.edu.hk;se.cuhk.edu.hk;lsec.cc.ac.cn;connect.hku.hk", "email": "se.cuhk.edu.hk;se.cuhk.edu.hk;lsec.cc.ac.cn;connect.hku.hk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c86a7ee3d8ef0b551ed58e354a836f2b-Abstract.html", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Chinese University of Hong Kong;Chinese Academy of Sciences;University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.cas.cn;https://www.hku.hk", "aff_unique_abbr": "CUHK;CAS;HKU", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Hong Kong SAR;Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Batched Gaussian Process Bandit Optimization via Determinantal Point Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7162", "id": "7162", "author_site": "Tarun Kathuria, Amit Deshpande, Pushmeet Kohli", "author": "Tarun Kathuria; Amit Deshpande; Pushmeet Kohli", "abstract": "Gaussian Process bandit optimization has emerged as a powerful tool for optimizing noisy black box functions. One example in machine learning is hyper-parameter optimization where each evaluation of the target function may require training a model which may involve days or even weeks of computation. Most methods for this so-called \u201cBayesian optimization\u201d only allow sequential exploration of the parameter space. However, it is often desirable to propose batches or sets of parameter values to explore simultaneously, especially when there are large parallel processing facilities at our disposal. Batch methods require modeling the interaction between the different evaluations in the batch, which can be expensive in complex scenarios. In this paper, we propose a new approach for parallelizing Bayesian optimization by modeling the diversity of a batch via Determinantal point processes (DPPs) whose kernels are learned automatically. This allows us to generalize a previous result as well as prove better regret bounds based on DPP sampling. Our experiments on a variety of synthetic and real-world robotics and hyper-parameter optimization tasks indicate that our DPP-based methods, especially those based on DPP sampling, outperform state-of-the-art methods.", "bibtex": "@inproceedings{NIPS2016_a1d7311f,\n author = {Kathuria, Tarun and Deshpande, Amit and Kohli, Pushmeet},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Batched Gaussian Process Bandit Optimization via Determinantal Point Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a1d7311f2a312426d710e1c617fcbc8c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a1d7311f2a312426d710e1c617fcbc8c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a1d7311f2a312426d710e1c617fcbc8c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a1d7311f2a312426d710e1c617fcbc8c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a1d7311f2a312426d710e1c617fcbc8c-Reviews.html", "metareview": "", "pdf_size": 825990, "gs_citation": 109, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1651712525124758806&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Microsoft Research; Microsoft Research; Microsoft Research", "aff_domain": "microsoft.com;microsoft.com;microsoft.com", "email": "microsoft.com;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a1d7311f2a312426d710e1c617fcbc8c-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bayesian Intermittent Demand Forecasting for Large Inventories", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7225", "id": "7225", "author_site": "Matthias W Seeger, David Salinas, Valentin Flunkert", "author": "Matthias W Seeger; David Salinas; Valentin Flunkert", "abstract": "We present a scalable and robust Bayesian method for demand forecasting in the context of a large e-commerce platform, paying special attention to intermittent and bursty target statistics. Inference is approximated by the Newton-Raphson algorithm, reduced to linear-time Kalman smoothing, which allows us to operate on several orders of magnitude larger problems than previous related work. In a study on large real-world sales datasets, our method outperforms competing approaches on fast and medium moving items.", "bibtex": "@inproceedings{NIPS2016_03255088,\n author = {Seeger, Matthias W and Salinas, David and Flunkert, Valentin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Intermittent Demand Forecasting for Large Inventories},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/03255088ed63354a54e0e5ed957e9008-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/03255088ed63354a54e0e5ed957e9008-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/03255088ed63354a54e0e5ed957e9008-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/03255088ed63354a54e0e5ed957e9008-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/03255088ed63354a54e0e5ed957e9008-Reviews.html", "metareview": "", "pdf_size": 635987, "gs_citation": 140, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3741212165034993508&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/03255088ed63354a54e0e5ed957e9008-Abstract.html" }, { "title": "Bayesian Optimization for Probabilistic Programs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7343", "id": "7343", "author_site": "Thomas Rainforth, Tuan Anh Le, Jan-Willem van de Meent, Michael A Osborne, Frank Wood", "author": "Tom Rainforth; Tuan Anh Le; Jan-Willem van de Meent; Michael A Osborne; Frank Wood", "abstract": "We present the first general purpose framework for marginal maximum a posteriori estimation of probabilistic program variables. By using a series of code transformations, the evidence of any probabilistic program, and therefore of any graphical model, can be optimized with respect to an arbitrary subset of its sampled variables. To carry out this optimization, we develop the first Bayesian optimization package to directly exploit the source code of its target, leading to innovations in problem-independent hyperpriors, unbounded optimization, and implicit constraint satisfaction; delivering significant performance improvements over prominent existing packages. We present applications of our method to a number of tasks including engineering design and parameter optimization.", "bibtex": "@inproceedings{NIPS2016_31fefc0e,\n author = {Rainforth, Tom and Le, Tuan Anh and van de Meent, Jan-Willem and Osborne, Michael A and Wood, Frank},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Optimization for Probabilistic Programs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/31fefc0e570cb3860f2a6d4b38c6490d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/31fefc0e570cb3860f2a6d4b38c6490d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/31fefc0e570cb3860f2a6d4b38c6490d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/31fefc0e570cb3860f2a6d4b38c6490d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/31fefc0e570cb3860f2a6d4b38c6490d-Reviews.html", "metareview": "", "pdf_size": 3437484, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7470114564585155610&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 18, "aff": "Department of Engineering Science, University of Oxford; Department of Engineering Science, University of Oxford; College of Computer and Information Science, Northeastern University; Department of Engineering Science, University of Oxford; Department of Engineering Science, University of Oxford", "aff_domain": "robots.ox.ac.uk;robots.ox.ac.uk;northeastern.edu;robots.ox.ac.uk;robots.ox.ac.uk", "email": "robots.ox.ac.uk;robots.ox.ac.uk;northeastern.edu;robots.ox.ac.uk;robots.ox.ac.uk", "github": "http://www.github.com/probprog/bopp/", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/31fefc0e570cb3860f2a6d4b38c6490d-Abstract.html", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Oxford;Northeastern University", "aff_unique_dep": "Department of Engineering Science;College of Computer and Information Science", "aff_unique_url": "https://www.ox.ac.uk;https://www.northeastern.edu", "aff_unique_abbr": "Oxford;NU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "title": "Bayesian Optimization with Robust Bayesian Neural Networks", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7420", "id": "7420", "author_site": "Jost Tobias Springenberg, Aaron Klein, Stefan Falkner, Frank Hutter", "author": "Jost Tobias Springenberg; Aaron Klein; Stefan Falkner; Frank Hutter", "abstract": "Bayesian optimization is a prominent method for optimizing expensive to evaluate black-box functions that is prominently applied to tuning the hyperparameters of machine learning algorithms. Despite its successes, the prototypical Bayesian optimization approach - using Gaussian process models - does not scale well to either many hyperparameters or many function evaluations. Attacking this lack of scalability and flexibility is thus one of the key challenges of the field. We present a general approach for using flexible parametric models (neural networks) for Bayesian optimization, staying as close to a truly Bayesian treatment as possible. We obtain scalability through stochastic gradient Hamiltonian Monte Carlo, whose robustness we improve via a scale adaptation. Experiments including multi-task Bayesian optimization with 21 tasks, parallel optimization of deep neural networks and deep reinforcement learning show the power and flexibility of this approach.", "bibtex": "@inproceedings{NIPS2016_a96d3afe,\n author = {Springenberg, Jost Tobias and Klein, Aaron and Falkner, Stefan and Hutter, Frank},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Optimization with Robust Bayesian Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a96d3afec184766bfeca7a9f989fc7e7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a96d3afec184766bfeca7a9f989fc7e7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a96d3afec184766bfeca7a9f989fc7e7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a96d3afec184766bfeca7a9f989fc7e7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a96d3afec184766bfeca7a9f989fc7e7-Reviews.html", "metareview": "", "pdf_size": 2273332, "gs_citation": 593, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=524651660795308665&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Department of Computer Science, University of Freiburg; Department of Computer Science, University of Freiburg; Department of Computer Science, University of Freiburg; Department of Computer Science, University of Freiburg", "aff_domain": "cs.uni-freiburg.de;cs.uni-freiburg.de;cs.uni-freiburg.de;cs.uni-freiburg.de", "email": "cs.uni-freiburg.de;cs.uni-freiburg.de;cs.uni-freiburg.de;cs.uni-freiburg.de", "github": "https://github.com/automl/RoBO", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a96d3afec184766bfeca7a9f989fc7e7-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Freiburg", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.uni-freiburg.de", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Bayesian Optimization with a Finite Budget: An Approximate Dynamic Programming Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7041", "id": "7041", "author_site": "Remi Lam, Karen Willcox, David Wolpert", "author": "Remi Lam; Karen Willcox; David H. Wolpert", "abstract": "We consider the problem of optimizing an expensive objective function when a finite budget of total evaluations is prescribed. In that context, the optimal solution strategy for Bayesian optimization can be formulated as a dynamic programming instance. This results in a complex problem with uncountable, dimension-increasing state space and an uncountable control space. We show how to approximate the solution of this dynamic programming problem using rollout, and propose rollout heuristics specifically designed for the Bayesian optimization setting. We present numerical experiments showing that the resulting algorithm for optimization with a finite budget outperforms several popular Bayesian optimization algorithms.", "bibtex": "@inproceedings{NIPS2016_5ea1649a,\n author = {Lam, Remi and Willcox, Karen and Wolpert, David H.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian Optimization with a Finite Budget: An Approximate Dynamic Programming Approach},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5ea1649a31336092c05438df996a3e59-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5ea1649a31336092c05438df996a3e59-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/5ea1649a31336092c05438df996a3e59-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5ea1649a31336092c05438df996a3e59-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5ea1649a31336092c05438df996a3e59-Reviews.html", "metareview": "", "pdf_size": 412446, "gs_citation": 146, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15399206020103536078&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Massachusetts Institute of Technology; Massachusetts Institute of Technology; Santa Fe Institute", "aff_domain": "mit.edu;mit.edu;santafe.edu", "email": "mit.edu;mit.edu;santafe.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5ea1649a31336092c05438df996a3e59-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Santa Fe Institute", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.santafe.edu", "aff_unique_abbr": "MIT;SFI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bayesian latent structure discovery from multi-neuron recordings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7363", "id": "7363", "author_site": "Scott Linderman, Ryan Adams, Jonathan Pillow", "author": "Scott Linderman; Ryan P. Adams; Jonathan W Pillow", "abstract": "Neural circuits contain heterogeneous groups of neurons that differ in type, location, connectivity, and basic response properties. However, traditional methods for dimensionality reduction and clustering are ill-suited to recovering the structure underlying the organization of neural circuits. In particular, they do not take advantage of the rich temporal dependencies in multi-neuron recordings and fail to account for the noise in neural spike trains. Here we describe new tools for inferring latent structure from simultaneously recorded spike train data using a hierarchical extension of a multi-neuron point process model commonly known as the generalized linear model (GLM). Our approach combines the GLM with flexible graph-theoretic priors governing the relationship between latent features and neural connectivity patterns. Fully Bayesian inference via P\u00f3lya-gamma augmentation of the resulting model allows us to classify neurons and infer latent dimensions of circuit organization from correlated spike trains. We demonstrate the effectiveness of our method with applications to synthetic data and multi-neuron recordings in primate retina, revealing latent patterns of neural types and locations from spike trains alone.", "bibtex": "@inproceedings{NIPS2016_708f3cf8,\n author = {Linderman, Scott and Adams, Ryan P and Pillow, Jonathan W},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian latent structure discovery from multi-neuron recordings},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/708f3cf8100d5e71834b1db77dfa15d6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/708f3cf8100d5e71834b1db77dfa15d6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/708f3cf8100d5e71834b1db77dfa15d6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/708f3cf8100d5e71834b1db77dfa15d6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/708f3cf8100d5e71834b1db77dfa15d6-Reviews.html", "metareview": "", "pdf_size": 4699236, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10427066892055131998&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Columbia University; Harvard University + Twitter; Princeton University", "aff_domain": "columbia.edu;seas.harvard.edu;princeton.edu", "email": "columbia.edu;seas.harvard.edu;princeton.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/708f3cf8100d5e71834b1db77dfa15d6-Abstract.html", "aff_unique_index": "0;1+2;3", "aff_unique_norm": "Columbia University;Harvard University;Twitter, Inc.;Princeton University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.columbia.edu;https://www.harvard.edu;https://twitter.com;https://www.princeton.edu", "aff_unique_abbr": "Columbia;Harvard;Twitter;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0;0", "aff_country_unique": "United States" }, { "title": "Bayesian optimization for automated model selection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7198", "id": "7198", "author_site": "Gustavo Malkomes, Charles Schaff, Roman Garnett", "author": "Gustavo Malkomes; Charles Schaff; Roman Garnett", "abstract": "Despite the success of kernel-based nonparametric methods, kernel selection still requires considerable expertise, and is often described as a \u201cblack art.\u201d We present a sophisticated method for automatically searching for an appropriate kernel from an infinite space of potential choices. Previous efforts in this direction have focused on traversing a kernel grammar, only examining the data via computation of marginal likelihood. Our proposed search method is based on Bayesian optimization in model space, where we reason about model evidence as a function to be maximized. We explicitly reason about the data distribution and how it induces similarity between potential model choices in terms of the explanations they can offer for observed data. In this light, we construct a novel kernel between models to explain a given dataset. Our method is capable of finding a model that explains a given dataset well without any human assistance, often with fewer computations of model evidence than previous approaches, a claim we demonstrate empirically.", "bibtex": "@inproceedings{NIPS2016_3bbfdde8,\n author = {Malkomes, Gustavo and Schaff, Charles and Garnett, Roman},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian optimization for automated model selection},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3bbfdde8842a5c44a0323518eec97cbe-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3bbfdde8842a5c44a0323518eec97cbe-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3bbfdde8842a5c44a0323518eec97cbe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3bbfdde8842a5c44a0323518eec97cbe-Reviews.html", "metareview": "", "pdf_size": 367248, "gs_citation": 159, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=705742059498051610&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Department of Computer Science and Engineering, Washington University in St. Louis; Department of Computer Science and Engineering, Washington University in St. Louis; Department of Computer Science and Engineering, Washington University in St. Louis", "aff_domain": "wustl.edu;wustl.edu;wustl.edu", "email": "wustl.edu;wustl.edu;wustl.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3bbfdde8842a5c44a0323518eec97cbe-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Washington University in St. Louis", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://wustl.edu", "aff_unique_abbr": "WashU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "St. Louis", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bayesian optimization under mixed constraints with a slack-variable augmented Lagrangian", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7261", "id": "7261", "author_site": "Victor Picheny, Robert B Gramacy, Stefan Wild, Sebastien Le Digabel", "author": "Victor Picheny; Robert B. Gramacy; Stefan Wild; Sebastien Le Digabel", "abstract": "An augmented Lagrangian (AL) can convert a constrained optimization problem into a sequence of simpler (e.g., unconstrained) problems which are then usually solved with local solvers. Recently, surrogate-based Bayesian optimization (BO) sub-solvers have been successfully deployed in the AL framework for a more global search in the presence of inequality constraints; however a drawback was that expected improvement (EI) evaluations relied on Monte Carlo. Here we introduce an alternative slack variable AL, and show that in this formulation the EI may be evaluated with library routines. The slack variables furthermore facilitate equality as well as inequality constraints, and mixtures thereof. We show our new slack \"ALBO\" compares favorably to the original. Its superiority over conventional alternatives is reinforced on several new mixed constraint examples.", "bibtex": "@inproceedings{NIPS2016_31839b03,\n author = {Picheny, Victor and Gramacy, Robert B and Wild, Stefan and Le Digabel, Sebastien},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bayesian optimization under mixed constraints with a slack-variable augmented Lagrangian},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/31839b036f63806cba3f47b93af8ccb5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/31839b036f63806cba3f47b93af8ccb5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/31839b036f63806cba3f47b93af8ccb5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/31839b036f63806cba3f47b93af8ccb5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/31839b036f63806cba3f47b93af8ccb5-Reviews.html", "metareview": "", "pdf_size": 383294, "gs_citation": 136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16495590784822835370&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "MIAT, Universit\u00e9 de Toulouse, INRA; Virginia Tech; Argonne National Laboratory; \u00c9cole Polytechnique de Montr\u00e9al", "aff_domain": "toulouse.inra.fr;vt.edu; wildmcs.anl.gov;polymtl.ca", "email": "toulouse.inra.fr;vt.edu; wildmcs.anl.gov;polymtl.ca", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/31839b036f63806cba3f47b93af8ccb5-Abstract.html", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Universit\u00e9 de Toulouse;Virginia Tech;Argonne National Laboratory;\u00c9cole Polytechnique de Montr\u00e9al", "aff_unique_dep": "MIAT;;;", "aff_unique_url": "https://www.univ-toulouse.fr;https://www.vt.edu;https://www.anl.gov;https://www.polymtl.ca", "aff_unique_abbr": "UT;VT;ANL;Polytechnique Montr\u00e9al", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montr\u00e9al", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "France;United States;Canada" }, { "title": "Beyond Exchangeability: The Chinese Voting Process", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7423", "id": "7423", "author_site": "Moontae Lee, Seok Hyun Jin, David Mimno", "author": "Moontae Lee; Seok Hyun Jin; David Mimno", "abstract": "Many online communities present user-contributed responses, such as reviews of products and answers to questions. User-provided helpfulness votes can highlight the most useful responses, but voting is a social process that can gain momentum based on the popularity of responses and the polarity of existing votes. We propose the Chinese Voting Process (CVP) which models the evolution of helpfulness votes as a self-reinforcing process dependent on position and presentation biases. We evaluate this model on Amazon product reviews and more than 80 StackExchange forums, measuring the intrinsic quality of individual responses and behavioral coefficients of different communities.", "bibtex": "@inproceedings{NIPS2016_33bb8372,\n author = {Lee, Moontae and Jin, Seok Hyun and Mimno, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Beyond Exchangeability: The Chinese Voting Process},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/33bb83720ba9d2b6da87114380314af5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/33bb83720ba9d2b6da87114380314af5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/33bb83720ba9d2b6da87114380314af5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/33bb83720ba9d2b6da87114380314af5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/33bb83720ba9d2b6da87114380314af5-Reviews.html", "metareview": "", "pdf_size": 853721, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8550536739468733462&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Dept. of Computer Science, Cornell University; Dept. of Computer Science, Cornell University; Dept. of Information Science, Cornell University", "aff_domain": "cs.cornell.edu;cornell.edu;cornell.edu", "email": "cs.cornell.edu;cornell.edu;cornell.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/33bb83720ba9d2b6da87114380314af5-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bi-Objective Online Matching and Submodular Allocations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7056", "id": "7056", "author_site": "Hossein Esfandiari, Nitish Korula, Vahab Mirrokni", "author": "Hossein Esfandiari; Nitish Korula; Vahab Mirrokni", "abstract": "Online allocation problems have been widely studied due to their numerous practical applications (particularly to Internet advertising), as well as considerable theoretical interest. The main challenge in such problems is making assignment decisions in the face of uncertainty about future input; effective algorithms need to predict which constraints are most likely to bind, and learn the balance between short-term gain and the value of long-term resource availability. In many important applications, the algorithm designer is faced with multiple objectives to optimize. In particular, in online advertising it is fairly common to optimize multiple metrics, such as clicks, conversions, and impressions, as well as other metrics which may be largely uncorrelated such as \u2018share of voice\u2019, and \u2018buyer surplus\u2019. While there has been considerable work on multi-objective offline optimization (when the entire input is known in advance), very little is known about the online case, particularly in the case of adversarial input. In this paper, we give the first results for bi-objective online submodular optimization, providing almost matching upper and lower bounds for allocating items to agents with two submodular value functions. We also study practically relevant special cases of this problem related to Internet advertising, and obtain improved results. All our algorithms are nearly best possible, as well as being efficient and easy to implement in practice.", "bibtex": "@inproceedings{NIPS2016_09662890,\n author = {Esfandiari, Hossein and Korula, Nitish and Mirrokni, Vahab},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bi-Objective Online Matching and Submodular Allocations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0966289037ad9846c5e994be2a91bafa-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0966289037ad9846c5e994be2a91bafa-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0966289037ad9846c5e994be2a91bafa-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0966289037ad9846c5e994be2a91bafa-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0966289037ad9846c5e994be2a91bafa-Reviews.html", "metareview": "", "pdf_size": 321931, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8271592666930386634&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "University of Maryland; Google Research; Google Research", "aff_domain": "cs.umd.edu;google.com;google.com", "email": "cs.umd.edu;google.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0966289037ad9846c5e994be2a91bafa-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Maryland;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www/umd.edu;https://research.google", "aff_unique_abbr": "UMD;Google Research", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Binarized Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6910", "id": "6910", "author_site": "Itay Hubara, Matthieu Courbariaux, Daniel Soudry, Ran El-Yaniv, Yoshua Bengio", "author": "Itay Hubara; Matthieu Courbariaux; Daniel Soudry; Ran El-Yaniv; Yoshua Bengio", "abstract": "We introduce a method to train Binarized Neural Networks (BNNs) - neural networks with binary weights and activations at run-time. At train-time the binary weights and activations are used for computing the parameter gradients. During the forward pass, BNNs drastically reduce memory size and accesses, and replace most arithmetic operations with bit-wise operations, which is expected to substantially improve power-efficiency. To validate the effectiveness of BNNs, we conducted two sets of experiments on the Torch7 and Theano frameworks. On both, BNNs achieved nearly state-of-the-art results over the MNIST, CIFAR-10 and SVHN datasets. We also report our preliminary results on the challenging ImageNet dataset. Last but not least, we wrote a binary matrix multiplication GPU kernel with which it is possible to run our MNIST BNN 7 times faster than with an unoptimized GPU kernel, without suffering any loss in classification accuracy. The code for training and running our BNNs is available on-line.", "bibtex": "@inproceedings{NIPS2016_d8330f85,\n author = {Hubara, Itay and Courbariaux, Matthieu and Soudry, Daniel and El-Yaniv, Ran and Bengio, Yoshua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Binarized Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d8330f857a17c53d217014ee776bfd50-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d8330f857a17c53d217014ee776bfd50-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d8330f857a17c53d217014ee776bfd50-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d8330f857a17c53d217014ee776bfd50-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d8330f857a17c53d217014ee776bfd50-Reviews.html", "metareview": "", "pdf_size": 289171, "gs_citation": 2841, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=84426770746747327&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Technion, Israel Institute of Technology; Universit\u00e9 de Montr\u00e9al; Columbia University; Technion, Israel Institute of Technology; Universit\u00e9 de Montr\u00e9al+CIFAR Senior Fellow", "aff_domain": "technion.ac.il;gmail.com;gmail.com;cs.technion.ac.il;gmail.com", "email": "technion.ac.il;gmail.com;gmail.com;cs.technion.ac.il;gmail.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d8330f857a17c53d217014ee776bfd50-Abstract.html", "aff_unique_index": "0;1;2;0;1+3", "aff_unique_norm": "Israel Institute of Technology;Universit\u00e9 de Montr\u00e9al;Columbia University;CIFAR", "aff_unique_dep": ";;;Senior Fellow", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.umontreal.ca;https://www.columbia.edu;https://www.cifar.ca", "aff_unique_abbr": "Technion;UdeM;Columbia;CIFAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;1+1", "aff_country_unique": "Israel;Canada;United States" }, { "title": "Blazing the trails before beating the path: Sample-efficient Monte-Carlo planning", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7226", "id": "7226", "author_site": "Jean-Bastien Grill, Michal Valko, Remi Munos", "author": "Jean-Bastien Grill; Michal Valko; Remi Munos", "abstract": "We study the sampling-based planning problem in Markov decision processes (MDPs) that we can access only through a generative model, usually referred to as Monte-Carlo planning. Our objective is to return a good estimate of the optimal value function at any state while minimizing the number of calls to the generative model, i.e. the sample complexity. We propose a new algorithm, TrailBlazer, able to handle MDPs with a finite or an infinite number of transitions from state-action to next states. TrailBlazer is an adaptive algorithm that exploits possible structures of the MDP by exploring only a subset of states reachable by following near-optimal policies. We provide bounds on its sample complexity that depend on a measure of the quantity of near-optimal states. The algorithm behavior can be considered as an extension of Monte-Carlo sampling (for estimating an expectation) to problems that alternate maximization (over actions) and expectation (over next states). Finally, another appealing feature of TrailBlazer is that it is simple to implement and computationally efficient.", "bibtex": "@inproceedings{NIPS2016_7b4773c0,\n author = {Grill, Jean-Bastien and Valko, Michal and Munos, Remi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Blazing the trails before beating the path: Sample-efficient Monte-Carlo planning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7b4773c039d539af17c883eb9283dd14-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7b4773c039d539af17c883eb9283dd14-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7b4773c039d539af17c883eb9283dd14-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7b4773c039d539af17c883eb9283dd14-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7b4773c039d539af17c883eb9283dd14-Reviews.html", "metareview": "", "pdf_size": 406152, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10499654695594681807&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 12, "aff": "SequeL team, INRIA Lille - Nord Europe, France; SequeL team, INRIA Lille - Nord Europe, France; Google DeepMind, UK+SequeL team, INRIA Lille - Nord Europe, France", "aff_domain": "inria.fr;inria.fr;google.com", "email": "inria.fr;inria.fr;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7b4773c039d539af17c883eb9283dd14-Abstract.html", "aff_unique_index": "0;0;1+0", "aff_unique_norm": "INRIA Lille - Nord Europe;Google", "aff_unique_dep": "SequeL team;Google DeepMind", "aff_unique_url": "https://www.inria.fr/en/centre/lille-nord-europe;https://deepmind.com", "aff_unique_abbr": "INRIA;DeepMind", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lille;", "aff_country_unique_index": "0;0;1+0", "aff_country_unique": "France;United Kingdom" }, { "title": "Blind Attacks on Machine Learners", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7189", "id": "7189", "author_site": "Alex Beatson, Zhaoran Wang, Han Liu", "author": "Alex Beatson; Zhaoran Wang; Han Liu", "abstract": "The importance of studying the robustness of learners to malicious data is well established. While much work has been done establishing both robust estimators and effective data injection attacks when the attacker is omniscient, the ability of an attacker to provably harm learning while having access to little information is largely unstudied. We study the potential of a \u201cblind attacker\u201d to provably limit a learner\u2019s performance by data injection attack without observing the learner\u2019s training set or any parameter of the distribution from which it is drawn. We provide examples of simple yet effective attacks in two settings: firstly, where an \u201cinformed learner\u201d knows the strategy chosen by the attacker, and secondly, where a \u201cblind learner\u201d knows only the proportion of malicious data and some family to which the malicious distribution chosen by the attacker belongs. For each attack, we analyze minimax rates of convergence and establish lower bounds on the learner\u2019s minimax risk, exhibiting limits on a learner\u2019s ability to learn under data injection attack even when the attacker is \u201cblind\u201d.", "bibtex": "@inproceedings{NIPS2016_a284df11,\n author = {Beatson, Alex and Wang, Zhaoran and Liu, Han},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Blind Attacks on Machine Learners},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a284df1155ec3e67286080500df36a9a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a284df1155ec3e67286080500df36a9a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a284df1155ec3e67286080500df36a9a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a284df1155ec3e67286080500df36a9a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a284df1155ec3e67286080500df36a9a-Reviews.html", "metareview": "", "pdf_size": 241335, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16443169926590524718&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "Department of Computer Science, Princeton University; Department of Operations Research and Financial Engineering, Princeton University; Department of Operations Research and Financial Engineering, Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu", "email": "princeton.edu;princeton.edu;princeton.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a284df1155ec3e67286080500df36a9a-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Blind Regression: Nonparametric Regression for Latent Variable Models via Collaborative Filtering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7072", "id": "7072", "author_site": "Dogyoon Song, Christina Lee, Yihua Li, Devavrat Shah", "author": "Dogyoon Song; Christina E. Lee; Yihua Li; Devavrat Shah", "abstract": "We introduce the framework of {\\em blind regression} motivated by {\\em matrix completion} for recommendation systems: given $m$ users, $n$ movies, and a subset of user-movie ratings, the goal is to predict the unobserved user-movie ratings given the data, i.e., to complete the partially observed matrix. Following the framework of non-parametric statistics, we posit that user $u$ and movie $i$ have features $x_1(u)$ and $x_2(i)$ respectively, and their corresponding rating $y(u,i)$ is a noisy measurement of $f(x_1(u), x_2(i))$ for some unknown function $f$. In contrast with classical regression, the features $x = (x_1(u), x_2(i))$ are not observed, making it challenging to apply standard regression methods to predict the unobserved ratings. Inspired by the classical Taylor's expansion for differentiable functions, we provide a prediction algorithm that is consistent for all Lipschitz functions. In fact, the analysis through our framework naturally leads to a variant of collaborative filtering, shedding insight into the widespread success of collaborative filtering in practice. Assuming each entry is sampled independently with probability at least $\\max(m^{-1+\\delta},n^{-1/2+\\delta})$ with $\\delta > 0$, we prove that the expected fraction of our estimates with error greater than $\\epsilon$ is less than $\\gamma^2 / \\epsilon^2$ plus a polynomially decaying term, where $\\gamma^2$ is the variance of the additive entry-wise noise term. Experiments with the MovieLens and Netflix datasets suggest that our algorithm provides principled improvements over basic collaborative filtering and is competitive with matrix factorization methods.", "bibtex": "@inproceedings{NIPS2016_678a1491,\n author = {Song, Dogyoon and Lee, Christina E. and Li, Yihua and Shah, Devavrat},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Blind Regression: Nonparametric Regression for Latent Variable Models via Collaborative Filtering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/678a1491514b7f1006d605e9161946b1-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/678a1491514b7f1006d605e9161946b1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/678a1491514b7f1006d605e9161946b1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/678a1491514b7f1006d605e9161946b1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/678a1491514b7f1006d605e9161946b1-Reviews.html", "metareview": "", "pdf_size": 309370, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3715234065896780320&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/678a1491514b7f1006d605e9161946b1-Abstract.html" }, { "title": "Boosting with Abstention", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6937", "id": "6937", "author_site": "Corinna Cortes, Giulia DeSalvo, Mehryar Mohri", "author": "Corinna Cortes; Giulia DeSalvo; Mehryar Mohri", "abstract": "We present a new boosting algorithm for the key scenario of binary classification with abstention where the algorithm can abstain from predicting the label of a point, at the price of a fixed cost. At each round, our algorithm selects a pair of functions, a base predictor and a base abstention function. We define convex upper bounds for the natural loss function associated to this problem, which we prove to be calibrated with respect to the Bayes solution. Our algorithm benefits from general margin-based learning guarantees which we derive for ensembles of pairs of base predictor and abstention functions, in terms of the Rademacher complexities of the corresponding function classes. We give convergence guarantees for our algorithm along with a linear-time weak-learning algorithm for abstention stumps. We also report the results of several experiments suggesting that our algorithm provides a significant improvement in practice over two confidence-based algorithms.", "bibtex": "@inproceedings{NIPS2016_7634ea65,\n author = {Cortes, Corinna and DeSalvo, Giulia and Mohri, Mehryar},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Boosting with Abstention},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7634ea65a4e6d9041cfd3f7de18e334a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7634ea65a4e6d9041cfd3f7de18e334a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7634ea65a4e6d9041cfd3f7de18e334a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7634ea65a4e6d9041cfd3f7de18e334a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7634ea65a4e6d9041cfd3f7de18e334a-Reviews.html", "metareview": "", "pdf_size": 2046638, "gs_citation": 151, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13918142828198712191&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Google Research, New York, NY 10011; Courant Institute, New York, NY 10012; Courant Institute and Google, New York, NY 10012", "aff_domain": "google.com;cims.nyu.edu;cims.nyu.edu", "email": "google.com;cims.nyu.edu;cims.nyu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7634ea65a4e6d9041cfd3f7de18e334a-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Google;Courant Institute of Mathematical Sciences", "aff_unique_dep": "Google Research;Mathematical Sciences", "aff_unique_url": "https://research.google;https://courant.nyu.edu", "aff_unique_abbr": "Google;Courant", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "New York", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Bootstrap Model Aggregation for Distributed Statistical Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6939", "id": "6939", "author_site": "JUN HAN, Qiang Liu", "author": "JUN HAN; Qiang Liu", "abstract": "In distributed, or privacy-preserving learning, we are often given a set of probabilistic models estimated from different local repositories, and asked to combine them into a single model that gives efficient statistical estimation. A simple method is to linearly average the parameters of the local models, which, however, tends to be degenerate or not applicable on non-convex models, or models with different parameter dimensions. One more practical strategy is to generate bootstrap samples from the local models, and then learn a joint model based on the combined bootstrap set. Unfortunately, the bootstrap procedure introduces additional noise and can significantly deteriorate the performance. In this work, we propose two variance reduction methods to correct the bootstrap noise, including a weighted M-estimator that is both statistically efficient and practically powerful. Both theoretical and empirical analysis is provided to demonstrate our methods.", "bibtex": "@inproceedings{NIPS2016_1ce927f8,\n author = {HAN, JUN and Liu, Qiang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Bootstrap Model Aggregation for Distributed Statistical Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1ce927f875864094e3906a4a0b5ece68-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1ce927f875864094e3906a4a0b5ece68-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/1ce927f875864094e3906a4a0b5ece68-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1ce927f875864094e3906a4a0b5ece68-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1ce927f875864094e3906a4a0b5ece68-Reviews.html", "metareview": "", "pdf_size": 456643, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12408121168765301633&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, Dartmouth College; Department of Computer Science, Dartmouth College", "aff_domain": "dartmouth.edu;dartmouth.edu", "email": "dartmouth.edu;dartmouth.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1ce927f875864094e3906a4a0b5ece68-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Dartmouth College", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://dartmouth.edu", "aff_unique_abbr": "Dartmouth", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Brains on Beats", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7251", "id": "7251", "author_site": "Umut G\u00fc\u00e7l\u00fc, Jordy Thielen, Michael Hanke, Marcel van Gerven, Marcel A. J. van Gerven", "author": "Umut G\u00fc\u00e7l\u00fc; Jordy Thielen; Michael Hanke; Marcel van Gerven", "abstract": "We developed task-optimized deep neural networks (DNNs) that achieved state-of-the-art performance in different evaluation scenarios for automatic music tagging. These DNNs were subsequently used to probe the neural representations of music. Representational similarity analysis revealed the existence of a representational gradient across the superior temporal gyrus (STG). Anterior STG was shown to be more sensitive to low-level stimulus features encoded in shallow DNN layers whereas posterior STG was shown to be more sensitive to high-level stimulus features encoded in deep DNN layers.", "bibtex": "@inproceedings{NIPS2016_b9d487a3,\n author = {G\\\"{u}\\c{c}l\\\"{u}, Umut and Thielen, Jordy and Hanke, Michael and van Gerven, Marcel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Brains on Beats},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b9d487a30398d42ecff55c228ed5652b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b9d487a30398d42ecff55c228ed5652b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b9d487a30398d42ecff55c228ed5652b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b9d487a30398d42ecff55c228ed5652b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b9d487a30398d42ecff55c228ed5652b-Reviews.html", "metareview": "", "pdf_size": 1792510, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13955153599374598537&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "aff": "Radboud University, Donders Institute for Brain, Cognition and Behaviour, Nijmegen, the Netherlands; Radboud University, Donders Institute for Brain, Cognition and Behaviour, Nijmegen, the Netherlands; Otto-von-Guericke University Magdeburg, Center for Behavioral Brain Sciences, Magdeburg, Germany; Radboud University, Donders Institute for Brain, Cognition and Behaviour, Nijmegen, the Netherlands", "aff_domain": "donders.ru.nl;psych.ru.nl;ovgu.de;donders.ru.nl", "email": "donders.ru.nl;psych.ru.nl;ovgu.de;donders.ru.nl", "github": "", "project": "http://psychoinformatics.de; http://www.ccnlab.net", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b9d487a30398d42ecff55c228ed5652b-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Radboud University;Otto-von-Guericke University Magdeburg", "aff_unique_dep": "Donders Institute for Brain, Cognition and Behaviour;Center for Behavioral Brain Sciences", "aff_unique_url": "https://www.ru.nl;https://www.ovgu.de", "aff_unique_abbr": "RU;OVGU", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Nijmegen;Magdeburg", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Netherlands;Germany" }, { "title": "Breaking the Bandwidth Barrier: Geometrical Adaptive Entropy Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7292", "id": "7292", "author_site": "Weihao Gao, Sewoong Oh, Pramod Viswanath", "author": "Weihao Gao; Sewoong Oh; Pramod Viswanath", "abstract": "Estimators of information theoretic measures such as entropy and mutual information from samples are a basic workhorse for many downstream applications in modern data science. State of the art approaches have been either geometric (nearest neighbor (NN) based) or kernel based (with bandwidth chosen to be data independent and vanishing sub linearly in the sample size). In this paper we combine both these approaches to design new estimators of entropy and mutual information that strongly outperform all state of the art methods. Our estimator uses bandwidth choice of fixed $k$-NN distances; such a choice is both data dependent and linearly vanishing in the sample size and necessitates a bias cancellation term that is universal and independent of the underlying distribution. As a byproduct, we obtain a unified way of obtaining both kernel and NN estimators. The corresponding theoretical contribution relating the geometry of NN distances to asymptotic order statistics is of independent mathematical interest.", "bibtex": "@inproceedings{NIPS2016_459a4ddc,\n author = {Gao, Weihao and Oh, Sewoong and Viswanath, Pramod},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Breaking the Bandwidth Barrier: Geometrical Adaptive Entropy Estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/459a4ddcb586f24efd9395aa7662bc7c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/459a4ddcb586f24efd9395aa7662bc7c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/459a4ddcb586f24efd9395aa7662bc7c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/459a4ddcb586f24efd9395aa7662bc7c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/459a4ddcb586f24efd9395aa7662bc7c-Reviews.html", "metareview": "", "pdf_size": 461732, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5624350292610546113&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Coordinated Science Lab and Department of Electrical and Computer Engineering; Coordinated Science Lab and Department of Industrial and Enterprise Systems Engineering; Coordinated Science Lab and Department of Electrical and Computer Engineering", "aff_domain": "illinois.edu;illinois.edu;illinois.edu", "email": "illinois.edu;illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/459a4ddcb586f24efd9395aa7662bc7c-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.cs.uiuc.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Budgeted stream-based active learning via adaptive submodular maximization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7091", "id": "7091", "author_site": "Kaito Fujii, Hisashi Kashima", "author": "Kaito Fujii; Hisashi Kashima", "abstract": "Active learning enables us to reduce the annotation cost by adaptively selecting unlabeled instances to be labeled. For pool-based active learning, several effective methods with theoretical guarantees have been developed through maximizing some utility function satisfying adaptive submodularity. In contrast, there have been few methods for stream-based active learning based on adaptive submodularity. In this paper, we propose a new class of utility functions, policy-adaptive submodular functions, and prove this class includes many existing adaptive submodular functions appearing in real world problems. We provide a general framework based on policy-adaptive submodularity that makes it possible to convert existing pool-based methods to stream-based methods and give theoretical guarantees on their performance. In addition we empirically demonstrate their effectiveness comparing with existing heuristics on common benchmark datasets.", "bibtex": "@inproceedings{NIPS2016_07cdfd23,\n author = {Fujii, Kaito and Kashima, Hisashi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Budgeted stream-based active learning via adaptive submodular maximization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/07cdfd23373b17c6b337251c22b7ea57-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/07cdfd23373b17c6b337251c22b7ea57-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/07cdfd23373b17c6b337251c22b7ea57-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/07cdfd23373b17c6b337251c22b7ea57-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/07cdfd23373b17c6b337251c22b7ea57-Reviews.html", "metareview": "", "pdf_size": 445377, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=446191886368671298&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Kyoto University + JST, ERATO, Kawarabayashi Large Graph Project; Kyoto University", "aff_domain": "ml.ist.i.kyoto-u.ac.jp;i.kyoto-u.ac.jp", "email": "ml.ist.i.kyoto-u.ac.jp;i.kyoto-u.ac.jp", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/07cdfd23373b17c6b337251c22b7ea57-Abstract.html", "aff_unique_index": "0+1;0", "aff_unique_norm": "Kyoto University;Japan Science and Technology Agency", "aff_unique_dep": ";Kawarabayashi Large Graph Project", "aff_unique_url": "https://www.kyoto-u.ac.jp;https://www.jst.go.jp", "aff_unique_abbr": "Kyoto U;JST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0", "aff_country_unique": "Japan" }, { "title": "CMA-ES with Optimal Covariance Update and Storage Complexity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7277", "id": "7277", "author_site": "Oswin Krause, D\u00eddac Rodr\u00edguez Arbon\u00e8s, Christian Igel", "author": "Oswin Krause; D\u00eddac Rodr\u00edguez Arbon\u00e8s; Christian Igel", "abstract": "The covariance matrix adaptation evolution strategy (CMA-ES) is arguably one of the most powerful real-valued derivative-free optimization algorithms, finding many applications in machine learning. The CMA-ES is a Monte Carlo method, sampling from a sequence of multi-variate Gaussian distributions. Given the function values at the sampled points, updating and storing the covariance matrix dominates the time and space complexity in each iteration of the algorithm. We propose a numerically stable quadratic-time covariance matrix update scheme with minimal memory requirements based on maintaining triangular Cholesky factors. This requires a modification of the cumulative step-size adaption (CSA) mechanism in the CMA-ES, in which we replace the inverse of the square root of the covariance matrix by the inverse of the triangular Cholesky factor. Because the triangular Cholesky factor changes smoothly with the matrix square root, this modification does not change the behavior of the CMA-ES in terms of required objective function evaluations as verified empirically. Thus, the described algorithm can and should replace the standard CMA-ES if updating and storing the covariance matrix matters.", "bibtex": "@inproceedings{NIPS2016_289dff07,\n author = {Krause, Oswin and Arbon\\`{e}s, D\\'{\\i}dac Rodr\\'{\\i}guez and Igel, Christian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {CMA-ES with Optimal Covariance Update and Storage Complexity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/289dff07669d7a23de0ef88d2f7129e7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/289dff07669d7a23de0ef88d2f7129e7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/289dff07669d7a23de0ef88d2f7129e7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/289dff07669d7a23de0ef88d2f7129e7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/289dff07669d7a23de0ef88d2f7129e7-Reviews.html", "metareview": "", "pdf_size": 565923, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16014270855212247995&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Dept. of Computer Science, University of Copenhagen, Copenhagen, Denmark; Dept. of Computer Science, University of Copenhagen, Copenhagen, Denmark; Dept. of Computer Science, University of Copenhagen, Copenhagen, Denmark", "aff_domain": "di.ku.dk;di.ku.dk;di.ku.dk", "email": "di.ku.dk;di.ku.dk;di.ku.dk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/289dff07669d7a23de0ef88d2f7129e7-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "Dept. of Computer Science", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Copenhagen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "title": "CNNpack: Packing Convolutional Neural Networks in the Frequency Domain", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7019", "id": "7019", "author_site": "Yunhe Wang, Chang Xu, Shan You, Dacheng Tao, Chao Xu", "author": "Yunhe Wang; Chang Xu; Shan You; Dacheng Tao; Chao Xu", "abstract": "Deep convolutional neural networks (CNNs) are successfully used in a number of applications. However, their storage and computational requirements have largely prevented their widespread use on mobile devices. Here we present an effective CNN compression approach in the frequency domain, which focuses not only on smaller weights but on all the weights and their underlying connections. By treating convolutional filters as images, we decompose their representations in the frequency domain as common parts (i.e., cluster centers) shared by other similar filters and their individual private parts (i.e., individual residuals). A large number of low-energy frequency coefficients in both parts can be discarded to produce high compression without significantly compromising accuracy. We relax the computational burden of convolution operations in CNNs by linearly combining the convolution responses of discrete cosine transform (DCT) bases. The compression and speed-up ratios of the proposed algorithm are thoroughly analyzed and evaluated on benchmark image datasets to demonstrate its superiority over state-of-the-art methods.", "bibtex": "@inproceedings{NIPS2016_36366388,\n author = {Wang, Yunhe and Xu, Chang and You, Shan and Tao, Dacheng and Xu, Chao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {CNNpack: Packing Convolutional Neural Networks in the Frequency Domain},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3636638817772e42b59d74cff571fbb3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3636638817772e42b59d74cff571fbb3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3636638817772e42b59d74cff571fbb3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3636638817772e42b59d74cff571fbb3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3636638817772e42b59d74cff571fbb3-Reviews.html", "metareview": "", "pdf_size": 3098603, "gs_citation": 221, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8800655015752811672&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Key Laboratory of Machine Perception (MOE), School of EECS, Peking University+Cooperative Medianet Innovation Center, Peking University; Centre for Quantum Computation and Intelligent Systems, School of Software, University of Technology Sydney; Key Laboratory of Machine Perception (MOE), School of EECS, Peking University+Cooperative Medianet Innovation Center, Peking University; Centre for Quantum Computation and Intelligent Systems, School of Software, University of Technology Sydney; Key Laboratory of Machine Perception (MOE), School of EECS, Peking University+Cooperative Medianet Innovation Center, Peking University", "aff_domain": "pku.edu.cn;uts.edu.au;pku.edu.cn;uts.edu.au;cis.pku.edu.cn", "email": "pku.edu.cn;uts.edu.au;pku.edu.cn;uts.edu.au;cis.pku.edu.cn", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3636638817772e42b59d74cff571fbb3-Abstract.html", "aff_unique_index": "0+0;1;0+0;1;0+0", "aff_unique_norm": "Peking University;University of Technology Sydney", "aff_unique_dep": "School of EECS;School of Software", "aff_unique_url": "http://www.pku.edu.cn;https://www.uts.edu.au", "aff_unique_abbr": "PKU;UTS", "aff_campus_unique_index": ";1;;1;", "aff_campus_unique": ";Sydney", "aff_country_unique_index": "0+0;1;0+0;1;0+0", "aff_country_unique": "China;Australia" }, { "title": "CRF-CNN: Modeling Structured Information in Human Pose Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7355", "id": "7355", "author_site": "Xiao Chu, Wanli Ouyang, Hongsheng Li, Xiaogang Wang", "author": "Xiao Chu; Wanli Ouyang; hongsheng Li; Xiaogang Wang", "abstract": "Deep convolutional neural networks (CNN) have achieved great success. On the other hand, modeling structural information has been proved critical in many vision problems. It is of great interest to integrate them effectively. In a classical neural network, there is no message passing between neurons in the same layer. In this paper, we propose a CRF-CNN framework which can simultaneously model structural information in both output and hidden feature layers in a probabilistic way, and it is applied to human pose estimation. A message passing scheme is proposed, so that in various layers each body joint receives messages from all the others in an efficient way. Such message passing can be implemented with convolution between features maps in the same layer, and it is also integrated with feedforward propagation in neural networks. Finally, a neural network implementation of end-to-end learning CRF-CNN is provided. Its effectiveness is demonstrated through experiments on two benchmark datasets.", "bibtex": "@inproceedings{NIPS2016_6f3ef77a,\n author = {Chu, Xiao and Ouyang, Wanli and Li, hongsheng and Wang, Xiaogang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {CRF-CNN: Modeling Structured Information in Human Pose Estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6f3ef77ac0e3619e98159e9b6febf557-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6f3ef77ac0e3619e98159e9b6febf557-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6f3ef77ac0e3619e98159e9b6febf557-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6f3ef77ac0e3619e98159e9b6febf557-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6f3ef77ac0e3619e98159e9b6febf557-Reviews.html", "metareview": "", "pdf_size": 462955, "gs_citation": 97, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17092830075290809962&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "The Chinese University of Hong Kong; The Chinese University of Hong Kong; The Chinese University of Hong Kong; The Chinese University of Hong Kong", "aff_domain": "ee.cuhk.edu.hk;ee.cuhk.edu.hk;ee.cuhk.edu.hk;ee.cuhk.edu.hk", "email": "ee.cuhk.edu.hk;ee.cuhk.edu.hk;ee.cuhk.edu.hk;ee.cuhk.edu.hk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6f3ef77ac0e3619e98159e9b6febf557-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Can Active Memory Replace Attention?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7397", "id": "7397", "author_site": "\u0141ukasz Kaiser, Samy Bengio", "author": "\u0141ukasz Kaiser; Samy Bengio", "abstract": "Several mechanisms to focus attention of a neural network on selected parts of its input or memory have been used successfully in deep learning models in recent years. Attention has improved image classification, image captioning, speech recognition, generative models, and learning algorithmic tasks, but it had probably the largest impact on neural machine translation. Recently, similar improvements have been obtained using alternative mechanisms that do not focus on a single part of a memory but operate on all of it in parallel, in a uniform way. Such mechanism, which we call active memory, improved over attention in algorithmic tasks, image processing, and in generative modelling. So far, however, active memory has not improved over attention for most natural language processing tasks, in particular for machine translation. We analyze this shortcoming in this paper and propose an extended model of active memory that matches existing attention models on neural machine translation and generalizes better to longer sentences. We investigate this model and explain why previous active memory models did not succeed. Finally, we discuss when active memory brings most benefits and where attention can be a better choice.", "bibtex": "@inproceedings{NIPS2016_fb8feff2,\n author = {Kaiser, \\L ukasz and Bengio, Samy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Can Active Memory Replace Attention?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fb8feff253bb6c834deb61ec76baa893-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fb8feff253bb6c834deb61ec76baa893-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/fb8feff253bb6c834deb61ec76baa893-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fb8feff253bb6c834deb61ec76baa893-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fb8feff253bb6c834deb61ec76baa893-Reviews.html", "metareview": "", "pdf_size": 319145, "gs_citation": 90, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2625008024985752158&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "Google Brain; Google Brain", "aff_domain": "google.com;google.com", "email": "google.com;google.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fb8feff253bb6c834deb61ec76baa893-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Brain", "aff_unique_url": "https://brain.google.com", "aff_unique_abbr": "Google Brain", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Can Peripheral Representations Improve Clutter Metrics on Complex Scenes?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7111", "id": "7111", "author_site": "Arturo Deza, Miguel Eckstein", "author": "Arturo Deza; Miguel Eckstein", "abstract": "Previous studies have proposed image-based clutter measures that correlate with human search times and/or eye movements. However, most models do not take into account the fact that the effects of clutter interact with the foveated nature of the human visual system: visual clutter further from the fovea has an increasing detrimental influence on perception. Here, we introduce a new foveated clutter model to predict the detrimental effects in target search utilizing a forced fixation search task. We use Feature Congestion (Rosenholtz et al.) as our non foveated clutter model, and we stack a peripheral architecture on top of Feature Congestion for our foveated model. We introduce the Peripheral Integration Feature Congestion (PIFC) coefficient, as a fundamental ingredient of our model that modulates clutter as a non-linear gain contingent on eccentricity. We finally show that Foveated Feature Congestion (FFC) clutter scores (r(44) = \u22120.82 \u00b1 0.04, p < 0.0001) correlate better with target detection (hit rate) than regular Feature Congestion (r(44) = \u22120.19 \u00b1 0.13, p = 0.0774) in forced fixation search; and we extend foveation to other clutter models showing stronger correlations in all cases. Thus, our model allows us to enrich clutter perception research by computing fixation specific clutter maps. Code for building peripheral representations is available.", "bibtex": "@inproceedings{NIPS2016_f4a331b7,\n author = {Deza, Arturo and Eckstein, Miguel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Can Peripheral Representations Improve Clutter Metrics on Complex Scenes?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f4a331b7a22d1b237565d8813a34d8ac-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f4a331b7a22d1b237565d8813a34d8ac-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f4a331b7a22d1b237565d8813a34d8ac-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f4a331b7a22d1b237565d8813a34d8ac-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f4a331b7a22d1b237565d8813a34d8ac-Reviews.html", "metareview": "", "pdf_size": 13202634, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14916089705785350698&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Dynamical Neuroscience, Institute for Collaborative Biotechnologies, UC Santa Barbara, CA, USA; Psychological and Brain Sciences, Institute for Collaborative Biotechnologies, UC Santa Barbara, CA, USA", "aff_domain": "dyns.ucsb.edu;psych.ucsb.edu", "email": "dyns.ucsb.edu;psych.ucsb.edu", "github": "https://github.com/ArturoDeza/Piranhas", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f4a331b7a22d1b237565d8813a34d8ac-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "Institute for Collaborative Biotechnologies", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Catching heuristics are optimal control policies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6991", "id": "6991", "author_site": "Boris Belousov, Gerhard Neumann, Constantin Rothkopf, Jan Peters", "author": "Boris Belousov; Gerhard Neumann; Constantin A Rothkopf; Jan R Peters", "abstract": "Two seemingly contradictory theories attempt to explain how humans move to intercept an airborne ball. One theory posits that humans predict the ball trajectory to optimally plan future actions; the other claims that, instead of performing such complicated computations, humans employ heuristics to reactively choose appropriate actions based on immediate visual feedback. In this paper, we show that interception strategies appearing to be heuristics can be understood as computational solutions to the optimal control problem faced by a ball-catching agent acting under uncertainty. Modeling catching as a continuous partially observable Markov decision process and employing stochastic optimal control theory, we discover that the four main heuristics described in the literature are optimal solutions if the catcher has sufficient time to continuously visually track the ball. Specifically, by varying model parameters such as noise, time to ground contact, and perceptual latency, we show that different strategies arise under different circumstances. The catcher's policy switches between generating reactive and predictive behavior based on the ratio of system to observation noise and the ratio between reaction time and task duration. Thus, we provide a rational account of human ball-catching behavior and a unifying explanation for seemingly contradictory theories of target interception on the basis of stochastic optimal control.", "bibtex": "@inproceedings{NIPS2016_43fa7f58,\n author = {Belousov, Boris and Neumann, Gerhard and Rothkopf, Constantin A and Peters, Jan R},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Catching heuristics are optimal control policies},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/43fa7f58b7eac7ac872209342e62e8f1-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/43fa7f58b7eac7ac872209342e62e8f1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/43fa7f58b7eac7ac872209342e62e8f1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/43fa7f58b7eac7ac872209342e62e8f1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/43fa7f58b7eac7ac872209342e62e8f1-Reviews.html", "metareview": "", "pdf_size": 565651, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12128050577800484162&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/43fa7f58b7eac7ac872209342e62e8f1-Abstract.html" }, { "title": "Causal Bandits: Learning Good Interventions via Causal Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7058", "id": "7058", "author_site": "Finnian Lattimore, Tor Lattimore, Mark Reid", "author": "Finnian Lattimore; Tor Lattimore; Mark D. Reid", "abstract": "We study the problem of using causal models to improve the rate at which good interventions can be learned online in a stochastic environment. Our formalism combines multi-arm bandits and causal inference to model a novel type of bandit feedback that is not exploited by existing approaches. We propose a new algorithm that exploits the causal feedback and prove a bound on its simple regret that is strictly better (in all quantities) than algorithms that do not use the additional causal information.", "bibtex": "@inproceedings{NIPS2016_b4288d9c,\n author = {Lattimore, Finnian and Lattimore, Tor and Reid, Mark D},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Causal Bandits: Learning Good Interventions via Causal Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b4288d9c0ec0a1841b3b3728321e7088-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b4288d9c0ec0a1841b3b3728321e7088-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b4288d9c0ec0a1841b3b3728321e7088-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b4288d9c0ec0a1841b3b3728321e7088-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b4288d9c0ec0a1841b3b3728321e7088-Reviews.html", "metareview": "", "pdf_size": 480893, "gs_citation": 201, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9059699409638151358&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Australian National University and Data61/NICTA; Indiana University, Bloomington; Australian National University and Data61/NICTA", "aff_domain": "gmail.com;gmail.com;anu.edu.au", "email": "gmail.com;gmail.com;anu.edu.au", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b4288d9c0ec0a1841b3b3728321e7088-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Australian National University;Indiana University", "aff_unique_dep": ";", "aff_unique_url": "https://www.anu.edu.au;https://www.indiana.edu", "aff_unique_abbr": "ANU;IU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bloomington", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Australia;United States" }, { "title": "Causal meets Submodular: Subset Selection with Directed Information", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6942", "id": "6942", "author_site": "Yuxun Zhou, Costas J Spanos", "author": "Yuxun Zhou; Costas J Spanos", "abstract": "We study causal subset selection with Directed Information as the measure of prediction causality. Two typical tasks, causal sensor placement and covariate selection, are correspondingly formulated into cardinality constrained directed information maximizations. To attack the NP-hard problems, we show that the first problem is submodular while not necessarily monotonic. And the second one is ``nearly'' submodular. To substantiate the idea of approximate submodularity, we introduce a novel quantity, namely submodularity index (SmI), for general set functions. Moreover, we show that based on SmI, greedy algorithm has performance guarantee for the maximization of possibly non-monotonic and non-submodular functions, justifying its usage for a much broader class of problems. We evaluate the theoretical results with several case studies, and also illustrate the application of the subset selection to causal structure learning.", "bibtex": "@inproceedings{NIPS2016_81ca0262,\n author = {Zhou, Yuxun and Spanos, Costas J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Causal meets Submodular: Subset Selection with Directed Information},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/81ca0262c82e712e50c580c032d99b60-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/81ca0262c82e712e50c580c032d99b60-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/81ca0262c82e712e50c580c032d99b60-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/81ca0262c82e712e50c580c032d99b60-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/81ca0262c82e712e50c580c032d99b60-Reviews.html", "metareview": "", "pdf_size": 552914, "gs_citation": 63, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10447861370294183955&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of EECS, UC Berekely; Department of EECS, UC Berkeley", "aff_domain": "berkeley.edu;berkeley.edu", "email": "berkeley.edu;berkeley.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/81ca0262c82e712e50c580c032d99b60-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Department of Electrical Engineering and Computer Sciences", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "CliqueCNN: Deep Unsupervised Exemplar Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7384", "id": "7384", "author_site": "Miguel A Bautista, Artsiom Sanakoyeu, Ekaterina Tikhoncheva, Bjorn Ommer", "author": "Miguel A Bautista; Artsiom Sanakoyeu; Ekaterina Tikhoncheva; Bjorn Ommer", "abstract": "Exemplar learning is a powerful paradigm for discovering visual similarities in an unsupervised manner. In this context, however, the recent breakthrough in deep learning could not yet unfold its full potential. With only a single positive sample, a great imbalance between one positive and many negatives, and unreliable relationships between most samples, training of convolutional neural networks is impaired. Given weak estimates of local distance we propose a single optimization problem to extract batches of samples with mutually consistent relations. Conflicting relations are distributed over different batches and similar samples are grouped into compact cliques. Learning exemplar similarities is framed as a sequence of clique categorization tasks. The CNN then consolidates transitivity relations within and between cliques and learns a single representation for all samples without the need for labels. The proposed unsupervised approach has shown competitive performance on detailed posture analysis and object classification.", "bibtex": "@inproceedings{NIPS2016_65fc52ed,\n author = {Bautista, Miguel A and Sanakoyeu, Artsiom and Tikhoncheva, Ekaterina and Ommer, Bjorn},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {CliqueCNN: Deep Unsupervised Exemplar Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/65fc52ed8f88c81323a418ca94cec2ed-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/65fc52ed8f88c81323a418ca94cec2ed-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/65fc52ed8f88c81323a418ca94cec2ed-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/65fc52ed8f88c81323a418ca94cec2ed-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/65fc52ed8f88c81323a418ca94cec2ed-Reviews.html", "metareview": "", "pdf_size": 8424507, "gs_citation": 131, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15201710115118769453&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Heidelberg Collaboratory for Image Processing; Heidelberg Collaboratory for Image Processing; Heidelberg Collaboratory for Image Processing; Heidelberg Collaboratory for Image Processing", "aff_domain": "iwr.uni-heidelberg.de;iwr.uni-heidelberg.de;iwr.uni-heidelberg.de;iwr.uni-heidelberg.de", "email": "iwr.uni-heidelberg.de;iwr.uni-heidelberg.de;iwr.uni-heidelberg.de;iwr.uni-heidelberg.de", "github": "https://github.com/asanakoy/cliquecnn", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/65fc52ed8f88c81323a418ca94cec2ed-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Heidelberg University", "aff_unique_dep": "Heidelberg Collaboratory for Image Processing", "aff_unique_url": "https://www.kip.uni-heidelberg.de", "aff_unique_abbr": "KIP", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Heidelberg", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "title": "Clustering Signed Networks with the Geometric Mean of Laplacians", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7239", "id": "7239", "author_site": "Pedro Mercado, Francesco Tudisco, Matthias Hein", "author": "Pedro Mercado; Francesco Tudisco; Matthias Hein", "abstract": "Signed networks allow to model positive and negative relationships. We analyze existing extensions of spectral clustering to signed networks. It turns out that existing approaches do not recover the ground truth clustering in several situations where either the positive or the negative network structures contain no noise. Our analysis shows that these problems arise as existing approaches take some form of arithmetic mean of the Laplacians of the positive and negative part. As a solution we propose to use the geometric mean of the Laplacians of positive and negative part and show that it outperforms the existing approaches. While the geometric mean of matrices is computationally expensive, we show that eigenvectors of the geometric mean can be computed efficiently, leading to a numerical scheme for sparse matrices which is of independent interest.", "bibtex": "@inproceedings{NIPS2016_7bc1ec1d,\n author = {Mercado, Pedro and Tudisco, Francesco and Hein, Matthias},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Clustering Signed Networks with the Geometric Mean of Laplacians},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7bc1ec1d9c3426357e69acd5bf320061-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7bc1ec1d9c3426357e69acd5bf320061-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7bc1ec1d9c3426357e69acd5bf320061-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7bc1ec1d9c3426357e69acd5bf320061-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7bc1ec1d9c3426357e69acd5bf320061-Reviews.html", "metareview": "", "pdf_size": 3184910, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=617824125394328018&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 17, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7bc1ec1d9c3426357e69acd5bf320061-Abstract.html" }, { "title": "Clustering with Bregman Divergences: an Asymptotic Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6955", "id": "6955", "author_site": "Chaoyue Liu, Mikhail Belkin", "author": "Chaoyue Liu; Mikhail Belkin", "abstract": "Clustering, in particular $k$-means clustering, is a central topic in data analysis. Clustering with Bregman divergences is a recently proposed generalization of $k$-means clustering which has already been widely used in applications. In this paper we analyze theoretical properties of Bregman clustering when the number of the clusters $k$ is large. We establish quantization rates and describe the limiting distribution of the centers as $k\\to \\infty$, extending well-known results for $k$-means clustering.", "bibtex": "@inproceedings{NIPS2016_c4851e8e,\n author = {Liu, Chaoyue and Belkin, Mikhail},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Clustering with Bregman Divergences: an Asymptotic Analysis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c4851e8e264415c4094e4e85b0baa7cc-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c4851e8e264415c4094e4e85b0baa7cc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c4851e8e264415c4094e4e85b0baa7cc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c4851e8e264415c4094e4e85b0baa7cc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c4851e8e264415c4094e4e85b0baa7cc-Reviews.html", "metareview": "", "pdf_size": 736498, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6157838940324203769&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Computer Science & Engineering, The Ohio State University; Department of Computer Science & Engineering, The Ohio State University", "aff_domain": "osu.edu;cse.ohio-state.edu", "email": "osu.edu;cse.ohio-state.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c4851e8e264415c4094e4e85b0baa7cc-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "Department of Computer Science & Engineering", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Clustering with Same-Cluster Queries", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7219", "id": "7219", "author_site": "Hassan Ashtiani, Shrinu Kushagra, Shai Ben-David", "author": "Hassan Ashtiani; Shrinu Kushagra; Shai Ben-David", "abstract": "We propose a framework for Semi-Supervised Active Clustering framework (SSAC), where the learner is allowed to interact with a domain expert, asking whether two given instances belong to the same cluster or not. We study the query and computational complexity of clustering in this framework. We consider a setting where the expert conforms to a center-based clustering with a notion of margin. We show that there is a trade off between computational complexity and query complexity; We prove that for the case of $k$-means clustering (i.e., when the expert conforms to a solution of $k$-means), having access to relatively few such queries allows efficient solutions to otherwise NP hard problems. In particular, we provide a probabilistic polynomial-time (BPP) algorithm for clustering in this setting that asks $O\\big(k^2\\log k + k\\log n)$ same-cluster queries and runs with time complexity $O\\big(kn\\log n)$ (where $k$ is the number of clusters and $n$ is the number of instances). The success of the algorithm is guaranteed for data satisfying the margin condition under which, without queries, we show that the problem is NP hard. We also prove a lower bound on the number of queries needed to have a computationally efficient clustering algorithm in this setting.", "bibtex": "@inproceedings{NIPS2016_9597353e,\n author = {Ashtiani, Hassan and Kushagra, Shrinu and Ben-David, Shai},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Clustering with Same-Cluster Queries},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9597353e41e6957b5e7aa79214fcb256-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9597353e41e6957b5e7aa79214fcb256-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9597353e41e6957b5e7aa79214fcb256-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9597353e41e6957b5e7aa79214fcb256-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9597353e41e6957b5e7aa79214fcb256-Reviews.html", "metareview": "", "pdf_size": 281478, "gs_citation": 110, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=959421139160240440&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "David R. Cheriton School of Computer Science, University of Waterloo, Waterloo, Ontario, Canada; David R. Cheriton School of Computer Science, University of Waterloo, Waterloo, Ontario, Canada; David R. Cheriton School of Computer Science, University of Waterloo, Waterloo, Ontario, Canada", "aff_domain": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca", "email": "uwaterloo.ca;uwaterloo.ca;uwaterloo.ca", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9597353e41e6957b5e7aa79214fcb256-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "David R. Cheriton School of Computer Science", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UWaterloo", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Waterloo", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Coevolutionary Latent Feature Processes for Continuous-Time User-Item Interactions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7380", "id": "7380", "author_site": "Yichen Wang, Nan Du, Rakshit Trivedi, Le Song", "author": "Yichen Wang; Nan Du; Rakshit Trivedi; Le Song", "abstract": "Matching users to the right items at the right time is a fundamental task in recommendation systems. As users interact with different items over time, users' and items' feature may evolve and co-evolve over time. Traditional models based on static latent features or discretizing time into epochs can become ineffective for capturing the fine-grained temporal dynamics in the user-item interactions. We propose a coevolutionary latent feature process model that accurately captures the coevolving nature of users' and items' feature. To learn parameters, we design an efficient convex optimization algorithm with a novel low rank space sharing constraints. Extensive experiments on diverse real-world datasets demonstrate significant improvements in user behavior prediction compared to state-of-the-arts.", "bibtex": "@inproceedings{NIPS2016_53ed35c7,\n author = {Wang, Yichen and Du, Nan and Trivedi, Rakshit and Song, Le},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Coevolutionary Latent Feature Processes for Continuous-Time User-Item Interactions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/53ed35c74a2ec275b837374f04396c03-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/53ed35c74a2ec275b837374f04396c03-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/53ed35c74a2ec275b837374f04396c03-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/53ed35c74a2ec275b837374f04396c03-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/53ed35c74a2ec275b837374f04396c03-Reviews.html", "metareview": "", "pdf_size": 1818624, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17440366235013824021&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "College of Computing, Georgia Institute of Technology; Google Research + College of Computing, Georgia Institute of Technology; College of Computing, Georgia Institute of Technology; College of Computing, Georgia Institute of Technology", "aff_domain": "gatech.edu;google.com;gatech.edu;cc.gatech.edu", "email": "gatech.edu;google.com;gatech.edu;cc.gatech.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/53ed35c74a2ec275b837374f04396c03-Abstract.html", "aff_unique_index": "0;1+0;0;0", "aff_unique_norm": "Georgia Institute of Technology;Google", "aff_unique_dep": "College of Computing;Google Research", "aff_unique_url": "https://www.gatech.edu;https://research.google", "aff_unique_abbr": "Georgia Tech;Google Research", "aff_campus_unique_index": "0;1+0;0;0", "aff_campus_unique": "Atlanta;Mountain View", "aff_country_unique_index": "0;0+0;0;0", "aff_country_unique": "United States" }, { "title": "Coin Betting and Parameter-Free Online Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6976", "id": "6976", "author_site": "Francesco Orabona, David Pal", "author": "Francesco Orabona; David Pal", "abstract": "In the recent years, a number of parameter-free algorithms have been developed for online linear optimization over Hilbert spaces and for learning with expert advice. These algorithms achieve optimal regret bounds that depend on the unknown competitors, without having to tune the learning rates with oracle choices. We present a new intuitive framework to design parameter-free algorithms for both online linear optimization over Hilbert spaces and for learning with expert advice, based on reductions to betting on outcomes of adversarial coins. We instantiate it using a betting algorithm based on the Krichevsky-Trofimov estimator. The resulting algorithms are simple, with no parameters to be tuned, and they improve or match previous results in terms of regret guarantee and per-round complexity.", "bibtex": "@inproceedings{NIPS2016_32072254,\n author = {Orabona, Francesco and Pal, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Coin Betting and Parameter-Free Online Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/320722549d1751cf3f247855f937b982-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/320722549d1751cf3f247855f937b982-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/320722549d1751cf3f247855f937b982-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/320722549d1751cf3f247855f937b982-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/320722549d1751cf3f247855f937b982-Reviews.html", "metareview": "", "pdf_size": 413689, "gs_citation": 202, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2692851645621025368&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Stony Brook University, Stony Brook, NY; Yahoo Research, New York, NY", "aff_domain": "orabona.com;yahoo-inc.com", "email": "orabona.com;yahoo-inc.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/320722549d1751cf3f247855f937b982-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Stony Brook University;Yahoo Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.stonybrook.edu;https://research.yahoo.com", "aff_unique_abbr": "SBU;Yahoo Res.", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stony Brook;New York", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Collaborative Recurrent Autoencoder: Recommend while Learning to Fill in the Blanks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6924", "id": "6924", "author_site": "Hao Wang, Xingjian SHI, Dit-Yan Yeung", "author": "Hao Wang; Xingjian SHI; Dit-Yan Yeung", "abstract": "Hybrid methods that utilize both content and rating information are commonly used in many recommender systems. However, most of them use either handcrafted features or the bag-of-words representation as a surrogate for the content information but they are neither effective nor natural enough. To address this problem, we develop a collaborative recurrent autoencoder (CRAE) which is a denoising recurrent autoencoder (DRAE) that models the generation of content sequences in the collaborative filtering (CF) setting. The model generalizes recent advances in recurrent deep learning from i.i.d. input to non-i.i.d. (CF-based) input and provides a new denoising scheme along with a novel learnable pooling scheme for the recurrent autoencoder. To do this, we first develop a hierarchical Bayesian model for the DRAE and then generalize it to the CF setting. The synergy between denoising and CF enables CRAE to make accurate recommendations while learning to fill in the blanks in sequences. Experiments on real-world datasets from different domains (CiteULike and Netflix) show that, by jointly modeling the order-aware generation of sequences for the content information and performing CF for the ratings, CRAE is able to significantly outperform the state of the art on both the recommendation task based on ratings and the sequence generation task based on content information.", "bibtex": "@inproceedings{NIPS2016_0266e33d,\n author = {Wang, Hao and SHI, Xingjian and Yeung, Dit-Yan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Collaborative Recurrent Autoencoder: Recommend while Learning to Fill in the Blanks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0266e33d3f546cb5436a10798e657d97-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0266e33d3f546cb5436a10798e657d97-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0266e33d3f546cb5436a10798e657d97-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0266e33d3f546cb5436a10798e657d97-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0266e33d3f546cb5436a10798e657d97-Reviews.html", "metareview": "", "pdf_size": 501455, "gs_citation": 133, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12054499124218516853&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Hong Kong University of Science and Technology; Hong Kong University of Science and Technology; Hong Kong University of Science and Technology", "aff_domain": "cse.ust.hk;cse.ust.hk;cse.ust.hk", "email": "cse.ust.hk;cse.ust.hk;cse.ust.hk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0266e33d3f546cb5436a10798e657d97-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Combinatorial Energy Learning for Image Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7119", "id": "7119", "author_site": "Jeremy Maitin-Shepard, Viren Jain, Michal Januszewski, Peter Li, Pieter Abbeel", "author": "Jeremy B Maitin-Shepard; Viren Jain; Michal Januszewski; Peter Li; Pieter Abbeel", "abstract": "We introduce a new machine learning approach for image segmentation that uses a neural network to model the conditional energy of a segmentation given an image. Our approach, combinatorial energy learning for image segmentation (CELIS) places a particular emphasis on modeling the inherent combinatorial nature of dense image segmentation problems. We propose efficient algorithms for learning deep neural networks to model the energy function, and for local optimization of this energy in the space of supervoxel agglomerations. We extensively evaluate our method on a publicly available 3-D microscopy dataset with 25 billion voxels of ground truth data. On an 11 billion voxel test set, we find that our method improves volumetric reconstruction accuracy by more than 20% as compared to two state-of-the-art baseline methods: graph-based segmentation of the output of a 3-D convolutional neural network trained to predict boundaries, as well as a random forest classifier trained to agglomerate supervoxels that were generated by a 3-D convolutional neural network.", "bibtex": "@inproceedings{NIPS2016_31857b44,\n author = {Maitin-Shepard, Jeremy B and Jain, Viren and Januszewski, Michal and Li, Peter and Abbeel, Pieter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Combinatorial Energy Learning for Image Segmentation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/31857b449c407203749ae32dd0e7d64a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/31857b449c407203749ae32dd0e7d64a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/31857b449c407203749ae32dd0e7d64a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/31857b449c407203749ae32dd0e7d64a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/31857b449c407203749ae32dd0e7d64a-Reviews.html", "metareview": "", "pdf_size": 827197, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15230029953062278465&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "UC Berkeley + Google; Google; Google; Google; UC Berkeley", "aff_domain": "google.com;google.com;google.com;google.com;cs.berkeley.edu", "email": "google.com;google.com;google.com;google.com;cs.berkeley.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/31857b449c407203749ae32dd0e7d64a-Abstract.html", "aff_unique_index": "0+1;1;1;1;0", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0+1;1;1;1;0", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0+0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Combinatorial Multi-Armed Bandit with General Reward Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7046", "id": "7046", "author_site": "Wei Chen, Wei Hu, Fu Li, Jian Li, Yu Liu, Pinyan Lu", "author": "Wei Chen; Wei Hu; Fu Li; Jian Li; Yu Liu; Pinyan Lu", "abstract": "In this paper, we study the stochastic combinatorial multi-armed bandit (CMAB) framework that allows a general nonlinear reward function, whose expected value may not depend only on the means of the input random variables but possibly on the entire distributions of these variables. Our framework enables a much larger class of reward functions such as the $\\max()$ function and nonlinear utility functions. Existing techniques relying on accurate estimations of the means of random variables, such as the upper confidence bound (UCB) technique, do not work directly on these functions. We propose a new algorithm called stochastically dominant confidence bound (SDCB), which estimates the distributions of underlying random variables and their stochastically dominant confidence bounds. We prove that SDCB can achieve $O(\\log T)$ distribution-dependent regret and $\\tilde{O}(\\sqrt{T})$ distribution-independent regret, where $T$ is the time horizon. We apply our results to the $K$-MAX problem and expected utility maximization problems. In particular, for $K$-MAX, we provide the first polynomial-time approximation scheme (PTAS) for its offline problem, and give the first $\\tilde{O}(\\sqrt T)$ bound on the $(1-\\epsilon)$-approximation regret of its online problem, for any $\\epsilon>0$.", "bibtex": "@inproceedings{NIPS2016_aa169b49,\n author = {Chen, Wei and Hu, Wei and Li, Fu and Li, Jian and Liu, Yu and Lu, Pinyan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Combinatorial Multi-Armed Bandit with General Reward Functions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/aa169b49b583a2b5af89203c2b78c67c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/aa169b49b583a2b5af89203c2b78c67c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/aa169b49b583a2b5af89203c2b78c67c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/aa169b49b583a2b5af89203c2b78c67c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/aa169b49b583a2b5af89203c2b78c67c-Reviews.html", "metareview": "", "pdf_size": 353642, "gs_citation": 175, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17602535211608072588&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "aff": "Microsoft Research; Princeton University; The University of Texas at Austin; Tsinghua University; Tsinghua University; Shanghai University of Finance and Economics", "aff_domain": "microsoft.com;cs.princeton.edu;gmail.com;gmail.com;gmail.com;mail.shufe.edu.cn", "email": "microsoft.com;cs.princeton.edu;gmail.com;gmail.com;gmail.com;mail.shufe.edu.cn", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/aa169b49b583a2b5af89203c2b78c67c-Abstract.html", "aff_unique_index": "0;1;2;3;3;4", "aff_unique_norm": "Microsoft;Princeton University;University of Texas at Austin;Tsinghua University;Shanghai University of Finance and Economics", "aff_unique_dep": "Microsoft Research;;;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.princeton.edu;https://www.utexas.edu;https://www.tsinghua.edu.cn;http://www.sufe.edu.cn", "aff_unique_abbr": "MSR;Princeton;UT Austin;THU;SUFE", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;1;1;1", "aff_country_unique": "United States;China" }, { "title": "Combinatorial semi-bandit with known covariance", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8495", "id": "8495", "author_site": "R\u00e9my Degenne, Vianney Perchet", "author": "R\u00e9my Degenne; Vianney Perchet", "abstract": "The combinatorial stochastic semi-bandit problem is an extension of the classical multi-armed bandit problem in which an algorithm pulls more than one arm at each stage and the rewards of all pulled arms are revealed. One difference with the single arm variant is that the dependency structure of the arms is crucial. Previous works on this setting either used a worst-case approach or imposed independence of the arms. We introduce a way to quantify the dependency structure of the problem and design an algorithm that adapts to it. The algorithm is based on linear regression and the analysis uses techniques from the linear bandit literature. By comparing its performance to a new lower bound, we prove that it is optimal, up to a poly-logarithmic factor in the number of arms pulled.", "bibtex": "@inproceedings{NIPS2016_e816c635,\n author = {Degenne, R\\'{e}my and Perchet, Vianney},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Combinatorial semi-bandit with known covariance},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e816c635cad85a60fabd6b97b03cbcc9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e816c635cad85a60fabd6b97b03cbcc9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/e816c635cad85a60fabd6b97b03cbcc9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e816c635cad85a60fabd6b97b03cbcc9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e816c635cad85a60fabd6b97b03cbcc9-Reviews.html", "metareview": "", "pdf_size": 341401, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17466404205219116632&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "LMPA, Universit\u00e9 Paris Diderot + CMLA, ENS Paris-Saclay; CMLA, ENS Paris-Saclay + CRITEO Research, Paris", "aff_domain": "cmla.ens-cachan.fr;normalesup.org", "email": "cmla.ens-cachan.fr;normalesup.org", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e816c635cad85a60fabd6b97b03cbcc9-Abstract.html", "aff_unique_index": "0+1;1+2", "aff_unique_norm": "Universit\u00e9 Paris Diderot;\u00c9cole Normale Sup\u00e9rieure Paris-Saclay;Criteo", "aff_unique_dep": "LMPA;CMLA;Research", "aff_unique_url": "https://www.univ-paris-diderot.fr;https://www.ens-paris-saclay.fr;https://research.criteo.com", "aff_unique_abbr": ";ENS Paris-Saclay;CRITEO", "aff_campus_unique_index": "1;1+2", "aff_campus_unique": ";Paris-Saclay;Paris", "aff_country_unique_index": "0+0;0+0", "aff_country_unique": "France" }, { "title": "Combining Adversarial Guarantees and Stochastic Fast Rates in Online Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7109", "id": "7109", "author_site": "Wouter Koolen, Peter Gr\u00fcnwald, Tim van Erven", "author": "Wouter M. Koolen; Peter Gr\u00fcnwald; Tim van Erven", "abstract": "We consider online learning algorithms that guarantee worst-case regret rates in adversarial environments (so they can be deployed safely and will perform robustly), yet adapt optimally to favorable stochastic environments (so they will perform well in a variety of settings of practical importance). We quantify the friendliness of stochastic environments by means of the well-known Bernstein (a.k.a. generalized Tsybakov margin) condition. For two recent algorithms (Squint for the Hedge setting and MetaGrad for online convex optimization) we show that the particular form of their data-dependent individual-sequence regret guarantees implies that they adapt automatically to the Bernstein parameters of the stochastic environment. We prove that these algorithms attain fast rates in their respective settings both in expectation and with high probability.", "bibtex": "@inproceedings{NIPS2016_db116b39,\n author = {Koolen, Wouter M and Gr\\\"{u}nwald, Peter and van Erven, Tim},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Combining Adversarial Guarantees and Stochastic Fast Rates in Online Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/db116b39f7a3ac5366079b1d9fe249a5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/db116b39f7a3ac5366079b1d9fe249a5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/db116b39f7a3ac5366079b1d9fe249a5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/db116b39f7a3ac5366079b1d9fe249a5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/db116b39f7a3ac5366079b1d9fe249a5-Reviews.html", "metareview": "", "pdf_size": 401162, "gs_citation": 48, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6309793777089569279&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "Centrum Wiskunde & Informatica; CWI and Leiden University; Leiden University", "aff_domain": "cwi.nl;cwi.nl;timvanerven.nl", "email": "cwi.nl;cwi.nl;timvanerven.nl", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/db116b39f7a3ac5366079b1d9fe249a5-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Centrum Wiskunde & Informatica;Leiden University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cwi.nl/;https://www.leidenuniv.nl", "aff_unique_abbr": "CWI;LU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "title": "Combining Fully Convolutional and Recurrent Neural Networks for 3D Biomedical Image Segmentation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7104", "id": "7104", "author_site": "Jianxu Chen, Lin Yang, Yizhe Zhang, Mark Alber, Danny Z Chen", "author": "Jianxu Chen; Lin Yang; Yizhe Zhang; Mark Alber; Danny Z Chen", "abstract": "Segmentation of 3D images is a fundamental problem in biomedical image analysis. Deep learning (DL) approaches have achieved the state-of-the-art segmentation performance. To exploit the 3D contexts using neural networks, known DL segmentation methods, including 3D convolution, 2D convolution on the planes orthogonal to 2D slices, and LSTM in multiple directions, all suffer incompatibility with the highly anisotropic dimensions in common 3D biomedical images. In this paper, we propose a new DL framework for 3D image segmentation, based on a combination of a fully convolutional network (FCN) and a recurrent neural network (RNN), which are responsible for exploiting the intra-slice and inter-slice contexts, respectively. To our best knowledge, this is the first DL framework for 3D image segmentation that explicitly leverages 3D image anisotropism. Evaluating using a dataset from the ISBI Neuronal Structure Segmentation Challenge and in-house image stacks for 3D fungus segmentation, our approach achieves promising results, comparing to the known DL-based 3D segmentation approaches.", "bibtex": "@inproceedings{NIPS2016_4dcf4354,\n author = {Chen, Jianxu and Yang, Lin and Zhang, Yizhe and Alber, Mark and Chen, Danny Z},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Combining Fully Convolutional and Recurrent Neural Networks for 3D Biomedical Image Segmentation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/4dcf435435894a4d0972046fc566af76-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/4dcf435435894a4d0972046fc566af76-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/4dcf435435894a4d0972046fc566af76-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/4dcf435435894a4d0972046fc566af76-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/4dcf435435894a4d0972046fc566af76-Reviews.html", "metareview": "", "pdf_size": 429294, "gs_citation": 444, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15495980988001652879&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "University of Notre Dame; University of Notre Dame; University of Notre Dame; University of Notre Dame; University of Notre Dame", "aff_domain": "nd.edu;nd.edu;nd.edu;nd.edu;nd.edu", "email": "nd.edu;nd.edu;nd.edu;nd.edu;nd.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/4dcf435435894a4d0972046fc566af76-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Notre Dame", "aff_unique_dep": "", "aff_unique_url": "https://www.nd.edu", "aff_unique_abbr": "Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Combining Low-Density Separators with CNNs", "author": "Yu-Xiong Wang, Martial Hebert", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7368", "id": "7368" }, { "title": "Communication-Optimal Distributed Clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8493", "id": "8493", "author_site": "Jiecao Chen, He Sun, David Woodruff, Qin Zhang", "author": "Jiecao Chen; He Sun; David Woodruff; Qin Zhang", "abstract": "Clustering large datasets is a fundamental problem with a number of applications in machine learning. Data is often collected on different sites and clustering needs to be performed in a distributed manner with low communication. We would like the quality of the clustering in the distributed setting to match that in the centralized setting for which all the data resides on a single site. In this work, we study both graph and geometric clustering problems in two distributed models: (1) a point-to-point model, and (2) a model with a broadcast channel. We give protocols in both models which we show are nearly optimal by proving almost matching communication lower bounds. Our work highlights the surprising power of a broadcast channel for clustering problems; roughly speaking, to cluster n points or n vertices in a graph distributed across s servers, for a worst-case partitioning the communication complexity in a point-to-point model is n*s, while in the broadcast model it is n + s. We implement our algorithms and demonstrate this phenomenon on real life datasets, showing that our algorithms are also very efficient in practice.", "bibtex": "@inproceedings{NIPS2016_7503cfac,\n author = {Chen, Jiecao and Sun, He and Woodruff, David and Zhang, Qin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Communication-Optimal Distributed Clustering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7503cfacd12053d309b6bed5c89de212-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7503cfacd12053d309b6bed5c89de212-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7503cfacd12053d309b6bed5c89de212-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7503cfacd12053d309b6bed5c89de212-Reviews.html", "metareview": "", "pdf_size": 790197, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5648103980334835035&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 18, "aff": "Indiana University; University of Bristol; IBM Research Almaden; Indiana University", "aff_domain": "indiana.edu;bristol.ac.uk;us.ibm.com;indiana.edu", "email": "indiana.edu;bristol.ac.uk;us.ibm.com;indiana.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7503cfacd12053d309b6bed5c89de212-Abstract.html", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Indiana University;University of Bristol;IBM", "aff_unique_dep": ";;IBM Research", "aff_unique_url": "https://www.indiana.edu;https://www.bristol.ac.uk;https://www.ibm.com/research", "aff_unique_abbr": "IU;Bristol;IBM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Almaden", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Community Detection on Evolving Graphs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6956", "id": "6956", "author_site": "Stefano Leonardi, Aris Anagnostopoulos, Jakub \u0141\u0105cki, Silvio Lattanzi, Mohammad Mahdian", "author": "Aris Anagnostopoulos; Jakub \u0141\u0105cki; Silvio Lattanzi; Stefano Leonardi; Mohammad Mahdian", "abstract": "Clustering is a fundamental step in many information-retrieval and data-mining applications. Detecting clusters in graphs is also a key tool for finding the community structure in social and behavioral networks. In many of these applications, the input graph evolves over time in a continual and decentralized manner, and, to maintain a good clustering, the clustering algorithm needs to repeatedly probe the graph. Furthermore, there are often limitations on the frequency of such probes, either imposed explicitly by the online platform (e.g., in the case of crawling proprietary social networks like twitter) or implicitly because of resource limitations (e.g., in the case of crawling the web). In this paper, we study a model of clustering on evolving graphs that captures this aspect of the problem. Our model is based on the classical stochastic block model, which has been used to assess rigorously the quality of various static clustering methods. In our model, the algorithm is supposed to reconstruct the planted clustering, given the ability to query for small pieces of local information about the graph, at a limited rate. We design and analyze clustering algorithms that work in this model, and show asymptotically tight upper and lower bounds on their accuracy. Finally, we perform simulations, which demonstrate that our main asymptotic results hold true also in practice.", "bibtex": "@inproceedings{NIPS2016_8698ff92,\n author = {Anagnostopoulos, Aris and \\L \\k{a}cki, Jakub and Lattanzi, Silvio and Leonardi, Stefano and Mahdian, Mohammad},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Community Detection on Evolving Graphs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8698ff92115213ab187d31d4ee5da8ea-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8698ff92115213ab187d31d4ee5da8ea-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8698ff92115213ab187d31d4ee5da8ea-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8698ff92115213ab187d31d4ee5da8ea-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8698ff92115213ab187d31d4ee5da8ea-Reviews.html", "metareview": "", "pdf_size": 268224, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1912238281535271789&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Sapienza University of Rome; Sapienza University of Rome; Google; Sapienza University of Rome; Google", "aff_domain": "dis.uniroma1.it;mimuw.edu.pl;google.com;dis.uniroma1.it;google.com", "email": "dis.uniroma1.it;mimuw.edu.pl;google.com;dis.uniroma1.it;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8698ff92115213ab187d31d4ee5da8ea-Abstract.html", "aff_unique_index": "0;0;1;0;1", "aff_unique_norm": "Sapienza University of Rome;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.uniroma1.it;https://www.google.com", "aff_unique_abbr": "Sapienza;Google", "aff_campus_unique_index": "0;0;1;0;1", "aff_campus_unique": "Rome;Mountain View", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "Italy;United States" }, { "title": "Completely random measures for modelling block-structured sparse networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8501", "id": "8501", "author_site": "Tue Herlau, Mikkel N Schmidt, Morten M\u00f8rup", "author": "Tue Herlau; Mikkel N Schmidt; Morten M\u00f8rup", "abstract": "Statistical methods for network data often parameterize the edge-probability by attributing latent traits such as block structure to the vertices and assume exchangeability in the sense of the Aldous-Hoover representation theorem. These assumptions are however incompatible with traits found in real-world networks such as a power-law degree-distribution. Recently, Caron & Fox (2014) proposed the use of a different notion of exchangeability after Kallenberg (2005) and obtained a network model which permits edge-inhomogeneity, such as a power-law degree-distribution whilst retaining desirable statistical properties. However, this model does not capture latent vertex traits such as block-structure. In this work we re-introduce the use of block-structure for network models obeying Kallenberg\u2019s notion of exchangeability and thereby obtain a collapsed model which both admits the inference of block-structure and edge inhomogeneity. We derive a simple expression for the likelihood and an efficient sampling method. The obtained model is not significantly more difficult to implement than existing approaches to block-modelling and performs well on real network datasets.", "bibtex": "@inproceedings{NIPS2016_3937230d,\n author = {Herlau, Tue and Schmidt, Mikkel N and M\\o rup, Morten},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Completely random measures for modelling block-structured sparse networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3937230de3c8041e4da6ac3246a888e8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3937230de3c8041e4da6ac3246a888e8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3937230de3c8041e4da6ac3246a888e8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3937230de3c8041e4da6ac3246a888e8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3937230de3c8041e4da6ac3246a888e8-Reviews.html", "metareview": "", "pdf_size": 1925679, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18186432885433872114&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3937230de3c8041e4da6ac3246a888e8-Abstract.html" }, { "title": "Composing graphical models with neural networks for structured representations and fast inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6917", "id": "6917", "author_site": "Matthew Johnson, David Duvenaud, Alex Wiltschko, Ryan Adams, Sandeep R Datta", "author": "Matthew J Johnson; David K. Duvenaud; Alex Wiltschko; Ryan P. Adams; Sandeep R Datta", "abstract": "We propose a general modeling and inference framework that combines the complementary strengths of probabilistic graphical models and deep learning methods. Our model family composes latent graphical models with neural network observation likelihoods. For inference, we use recognition networks to produce local evidence potentials, then combine them with the model distribution using efficient message-passing algorithms. All components are trained simultaneously with a single stochastic variational inference objective. We illustrate this framework by automatically segmenting and categorizing mouse behavior from raw depth video, and demonstrate several other example models.", "bibtex": "@inproceedings{NIPS2016_7d6044e9,\n author = {Johnson, Matthew J and Duvenaud, David K and Wiltschko, Alex and Adams, Ryan P and Datta, Sandeep R},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Composing graphical models with neural networks for structured representations and fast inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7d6044e95a16761171b130dcb476a43e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7d6044e95a16761171b130dcb476a43e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7d6044e95a16761171b130dcb476a43e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7d6044e95a16761171b130dcb476a43e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7d6044e95a16761171b130dcb476a43e-Reviews.html", "metareview": "", "pdf_size": 3591109, "gs_citation": 581, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12286587149980999415&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 18, "aff": "Harvard University; Harvard University; Harvard University+Twitter; Harvard Medical School; Harvard University+Twitter", "aff_domain": "seas.harvard.edu;seas.harvard.edu;fas.harvard.edu;hms.harvard.edu;seas.harvard.edu", "email": "seas.harvard.edu;seas.harvard.edu;fas.harvard.edu;hms.harvard.edu;seas.harvard.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7d6044e95a16761171b130dcb476a43e-Abstract.html", "aff_unique_index": "0;0;0+1;0;0+1", "aff_unique_norm": "Harvard University;Twitter, Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://twitter.com", "aff_unique_abbr": "Harvard;Twitter", "aff_campus_unique_index": ";1;", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0+0;0;0+0", "aff_country_unique": "United States" }, { "title": "Computational and Statistical Tradeoffs in Learning to Rank", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7043", "id": "7043", "author_site": "Ashish Khetan, Sewoong Oh", "author": "Ashish Khetan; Sewoong Oh", "abstract": "For massive and heterogeneous modern data sets, it is of fundamental interest to provide guarantees on the accuracy of estimation when computational resources are limited. In the application of learning to rank, we provide a hierarchy of rank-breaking mechanisms ordered by the complexity in thus generated sketch of the data. This allows the number of data points collected to be gracefully traded off against computational resources available, while guaranteeing the desired level of accuracy. Theoretical guarantees on the proposed generalized rank-breaking implicitly provide such trade-offs, which can be explicitly characterized under certain canonical scenarios on the structure of the data.", "bibtex": "@inproceedings{NIPS2016_2421fcb1,\n author = {Khetan, Ashish and Oh, Sewoong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Computational and Statistical Tradeoffs in Learning to Rank},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2421fcb1263b9530df88f7f002e78ea5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2421fcb1263b9530df88f7f002e78ea5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2421fcb1263b9530df88f7f002e78ea5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2421fcb1263b9530df88f7f002e78ea5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2421fcb1263b9530df88f7f002e78ea5-Reviews.html", "metareview": "", "pdf_size": 413400, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11090439126947457475&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of ISE, University of Illinois at Urbana-Champaign; Department of ISE, University of Illinois at Urbana-Champaign", "aff_domain": "illinois.edu;illinois.edu", "email": "illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2421fcb1263b9530df88f7f002e78ea5-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "Department of Industrial and Systems Engineering", "aff_unique_url": "https://www.illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Computing and maximizing influence in linear threshold and triggering models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6954", "id": "6954", "author_site": "Justin Khim, Varun Jog, Po-Ling Loh", "author": "Justin T Khim; Varun Jog; Po-Ling Loh", "abstract": "We establish upper and lower bounds for the influence of a set of nodes in certain types of contagion models. We derive two sets of bounds, the first designed for linear threshold models, and the second more broadly applicable to a general class of triggering models, which subsumes the popular independent cascade models, as well. We quantify the gap between our upper and lower bounds in the case of the linear threshold model and illustrate the gains of our upper bounds for independent cascade models in relation to existing results. Importantly, our lower bounds are monotonic and submodular, implying that a greedy algorithm for influence maximization is guaranteed to produce a maximizer within a (1 - 1/e)-factor of the truth. Although the problem of exact influence computation is NP-hard in general, our bounds may be evaluated efficiently. This leads to an attractive, highly scalable algorithm for influence maximization with rigorous theoretical guarantees.", "bibtex": "@inproceedings{NIPS2016_66e8ba82,\n author = {Khim, Justin T and Jog, Varun and Loh, Po-Ling},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Computing and maximizing influence in linear threshold and triggering models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/66e8ba8216a1e152d72653d99a4f03ab-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/66e8ba8216a1e152d72653d99a4f03ab-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/66e8ba8216a1e152d72653d99a4f03ab-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/66e8ba8216a1e152d72653d99a4f03ab-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/66e8ba8216a1e152d72653d99a4f03ab-Reviews.html", "metareview": "", "pdf_size": 344847, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=425773627899850001&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff": "Department of Statistics, The Wharton School, University of Pennsylvania; Electrical & Computer Engineering Department, University of Wisconsin - Madison; Electrical & Computer Engineering Department, University of Wisconsin - Madison", "aff_domain": "wharton.upenn.edu;wisc.edu;ece.wisc.edu", "email": "wharton.upenn.edu;wisc.edu;ece.wisc.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/66e8ba8216a1e152d72653d99a4f03ab-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Pennsylvania;University of Wisconsin - Madison", "aff_unique_dep": "Department of Statistics;Electrical & Computer Engineering Department", "aff_unique_url": "https://www.upenn.edu;https://www.wisc.edu", "aff_unique_abbr": "UPenn;UW-Madison", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Conditional Generative Moment-Matching Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7183", "id": "7183", "author_site": "Yong Ren, Jun Zhu, Jialian Li, Yucen Luo", "author": "Yong Ren; Jun Zhu; Jialian Li; Yucen Luo", "abstract": "Maximum mean discrepancy (MMD) has been successfully applied to learn deep generative models for characterizing a joint distribution of variables via kernel mean embedding. In this paper, we present conditional generative moment-matching networks (CGMMN), which learn a conditional distribution given some input variables based on a conditional maximum mean discrepancy (CMMD) criterion. The learning is performed by stochastic gradient descent with the gradient calculated by back-propagation. We evaluate CGMMN on a wide range of tasks, including predictive modeling, contextual generation, and Bayesian dark knowledge, which distills knowledge from a Bayesian model by learning a relatively small CGMMN student network. Our results demonstrate competitive performance in all the tasks.", "bibtex": "@inproceedings{NIPS2016_0245952e,\n author = {Ren, Yong and Zhu, Jun and Li, Jialian and Luo, Yucen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Conditional Generative Moment-Matching Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0245952ecff55018e2a459517fdb40e3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0245952ecff55018e2a459517fdb40e3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0245952ecff55018e2a459517fdb40e3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0245952ecff55018e2a459517fdb40e3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0245952ecff55018e2a459517fdb40e3-Reviews.html", "metareview": "", "pdf_size": 519802, "gs_citation": 87, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17065677798713071609&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0245952ecff55018e2a459517fdb40e3-Abstract.html" }, { "title": "Conditional Image Generation with PixelCNN Decoders", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7267", "id": "7267", "author_site": "Aaron van den Oord, Nal Kalchbrenner, Lasse Espeholt, koray kavukcuoglu, Oriol Vinyals, Alex Graves", "author": "Aaron van den Oord; Nal Kalchbrenner; Lasse Espeholt; koray kavukcuoglu; Oriol Vinyals; Alex Graves", "abstract": "This work explores conditional image generation with a new image density model based on the PixelCNN architecture. The model can be conditioned on any vector, including descriptive labels or tags, or latent embeddings created by other networks. When conditioned on class labels from the ImageNet database, the model is able to generate diverse, realistic scenes representing distinct animals, objects, landscapes and structures. When conditioned on an embedding produced by a convolutional network given a single image of an unseen face, it generates a variety of new portraits of the same person with different facial expressions, poses and lighting conditions. We also show that conditional PixelCNN can serve as a powerful decoder in an image autoencoder. Additionally, the gated convolutional layers in the proposed model improve the log-likelihood of PixelCNN to match the state-of-the-art performance of PixelRNN on ImageNet, with greatly reduced computational cost.", "bibtex": "@inproceedings{NIPS2016_b1301141,\n author = {van den Oord, Aaron and Kalchbrenner, Nal and Espeholt, Lasse and kavukcuoglu, koray and Vinyals, Oriol and Graves, Alex},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Conditional Image Generation with PixelCNN Decoders},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b1301141feffabac455e1f90a7de2054-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b1301141feffabac455e1f90a7de2054-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b1301141feffabac455e1f90a7de2054-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b1301141feffabac455e1f90a7de2054-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b1301141feffabac455e1f90a7de2054-Reviews.html", "metareview": "", "pdf_size": 2258558, "gs_citation": 3138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8587297613215686995&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 11, "aff": "Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b1301141feffabac455e1f90a7de2054-Abstract.html", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Confusions over Time: An Interpretable Bayesian Model to Characterize Trends in Decision Making", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7341", "id": "7341", "author_site": "Himabindu Lakkaraju, Jure Leskovec", "author": "Himabindu Lakkaraju; Jure Leskovec", "abstract": "We propose Confusions over Time (CoT), a novel generative framework which facilitates a multi-granular analysis of the decision making process. The CoT not only models the confusions or error properties of individual decision makers and their evolution over time, but also allows us to obtain diagnostic insights into the collective decision making process in an interpretable manner. To this end, the CoT models the confusions of the decision makers and their evolution over time via time-dependent confusion matrices. Interpretable insights are obtained by grouping similar decision makers (and items being judged) into clusters and representing each such cluster with an appropriate prototype and identifying the most important features characterizing the cluster via a subspace feature indicator vector. Experimentation with real world data on bail decisions, asthma treatments, and insurance policy approval decisions demonstrates that CoT can accurately model and explain the confusions of decision makers and their evolution over time.", "bibtex": "@inproceedings{NIPS2016_97d01458,\n author = {Lakkaraju, Himabindu and Leskovec, Jure},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Confusions over Time: An Interpretable Bayesian Model to Characterize Trends in Decision Making},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/97d0145823aeb8ed80617be62e08bdcc-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/97d0145823aeb8ed80617be62e08bdcc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/97d0145823aeb8ed80617be62e08bdcc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/97d0145823aeb8ed80617be62e08bdcc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/97d0145823aeb8ed80617be62e08bdcc-Reviews.html", "metareview": "", "pdf_size": 611299, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4451240249298854620&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science, Stanford University; Department of Computer Science, Stanford University", "aff_domain": "cs.stanford.edu;cs.stanford.edu", "email": "cs.stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/97d0145823aeb8ed80617be62e08bdcc-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Consistent Estimation of Functions of Data Missing Non-Monotonically and Not at Random", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7404", "id": "7404", "author": "Ilya Shpitser", "abstract": "Missing records are a perennial problem in analysis of complex data of all types, when the target of inference is some function of the full data law. In simple cases, where data is missing at random or completely at random (Rubin, 1976), well-known adjustments exist that result in consistent estimators of target quantities. Assumptions underlying these estimators are generally not realistic in practical missing data problems. Unfortunately, consistent estimators in more complex cases where data is missing not at random, and where no ordering on variables induces monotonicity of missingness status are not known in general, with some notable exceptions (Robins, 1997), (Tchetgen Tchetgen et al, 2016), (Sadinle and Reiter, 2016). In this paper, we propose a general class of consistent estimators for cases where data is missing not at random, and missingness status is non-monotonic. Our estimators, which are generalized inverse probability weighting estimators, make no assumptions on the underlying full data law, but instead place independence restrictions, and certain other fairly mild assumptions, on the distribution of missingness status conditional on the data. The assumptions we place on the distribution of missingness status conditional on the data can be viewed as a version of a conditional Markov random field (MRF) corresponding to a chain graph. Assumptions embedded in our model permit identification from the observed data law, and admit a natural fitting procedure based on the pseudo likelihood approach of (Besag, 1975). We illustrate our approach with a simple simulation study, and an analysis of risk of premature birth in women in Botswana exposed to highly active anti-retroviral therapy.", "bibtex": "@inproceedings{NIPS2016_7bd28f15,\n author = {Shpitser, Ilya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Consistent Estimation of Functions of Data Missing Non-Monotonically and Not at Random},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7bd28f15a49d5e5848d6ec70e584e625-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7bd28f15a49d5e5848d6ec70e584e625-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7bd28f15a49d5e5848d6ec70e584e625-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7bd28f15a49d5e5848d6ec70e584e625-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7bd28f15a49d5e5848d6ec70e584e625-Reviews.html", "metareview": "", "pdf_size": 335878, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15945000851705026755&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, Johns Hopkins University", "aff_domain": "cs.jhu.edu", "email": "cs.jhu.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7bd28f15a49d5e5848d6ec70e584e625-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Consistent Kernel Mean Estimation for Functions of Random Variables", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6996", "id": "6996", "author_site": "Carl-Johann Simon-Gabriel, Adam Scibior, Ilya Tolstikhin, Bernhard Sch\u00f6lkopf", "author": "Carl-Johann Simon-Gabriel; Adam Scibior; Ilya O Tolstikhin; Bernhard Sch\u00f6lkopf", "abstract": "We provide a theoretical foundation for non-parametric estimation of functions of random variables using kernel mean embeddings. We show that for any continuous function f, consistent estimators of the mean embedding of a random variable X lead to consistent estimators of the mean embedding of f(X). For Matern kernels and sufficiently smooth functions we also provide rates of convergence. Our results extend to functions of multiple random variables. If the variables are dependent, we require an estimator of the mean embedding of their joint distribution as a starting point; if they are independent, it is sufficient to have separate estimators of the mean embeddings of their marginal distributions. In either case, our results cover both mean embeddings based on i.i.d. samples as well as \"reduced set\" expansions in terms of dependent expansion points. The latter serves as a justification for using such expansions to limit memory resources when applying the approach as a basis for probabilistic programming.", "bibtex": "@inproceedings{NIPS2016_4b6538a4,\n author = {Simon-Gabriel, Carl-Johann and Scibior, Adam and Tolstikhin, Ilya O and Sch\\\"{o}lkopf, Bernhard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Consistent Kernel Mean Estimation for Functions of Random Variables},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/4b6538a44a1dfdc2b83477cd76dee98e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/4b6538a44a1dfdc2b83477cd76dee98e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/4b6538a44a1dfdc2b83477cd76dee98e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/4b6538a44a1dfdc2b83477cd76dee98e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/4b6538a44a1dfdc2b83477cd76dee98e-Reviews.html", "metareview": "", "pdf_size": 521561, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15605698518429640623&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of Empirical Inference, Max Planck Institute for Intelligent Systems; Department of Empirical Inference, Max Planck Institute for Intelligent Systems + Engineering Department, Cambridge University; Department of Empirical Inference, Max Planck Institute for Intelligent Systems; Department of Empirical Inference, Max Planck Institute for Intelligent Systems", "aff_domain": "tuebingen.mpg.de;tuebingen.mpg.de;tuebingen.mpg.de;tuebingen.mpg.de", "email": "tuebingen.mpg.de;tuebingen.mpg.de;tuebingen.mpg.de;tuebingen.mpg.de", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/4b6538a44a1dfdc2b83477cd76dee98e-Abstract.html", "aff_unique_index": "0;0+1;0;0", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;University of Cambridge", "aff_unique_dep": "Department of Empirical Inference;Engineering Department", "aff_unique_url": "https://www.mpituebingen.mpg.de;https://www.cam.ac.uk", "aff_unique_abbr": "MPI-IS;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0+1;0;0", "aff_country_unique": "Germany;United Kingdom" }, { "title": "Constraints Based Convex Belief Propagation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7118", "id": "7118", "author_site": "Y\u007faniv Tenzer, Alex Schwing, Kevin Gimpel, Tamir Hazan", "author": "Y\u007faniv Tenzer; Alex Schwing; Kevin Gimpel; Tamir Hazan", "abstract": "Inference in Markov random fields subject to consistency structure is a fundamental problem that arises in many real-life applications. In order to enforce consistency, classical approaches utilize consistency potentials or encode constraints over feasible instances. Unfortunately this comes at the price of a serious computational bottleneck. In this paper we suggest to tackle consistency by incorporating constraints on beliefs. This permits derivation of a closed-form message-passing algorithm which we refer to as the Constraints Based Convex Belief Propagation (CBCBP). Experiments show that CBCBP outperforms the standard approach while being at least an order of magnitude faster.", "bibtex": "@inproceedings{NIPS2016_dc960c46,\n author = {Tenzer, Y\u007faniv and Schwing, Alex and Gimpel, Kevin and Hazan, Tamir},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Constraints Based Convex Belief Propagation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/dc960c46c38bd16e953d97cdeefdbc68-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/dc960c46c38bd16e953d97cdeefdbc68-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/dc960c46c38bd16e953d97cdeefdbc68-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/dc960c46c38bd16e953d97cdeefdbc68-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/dc960c46c38bd16e953d97cdeefdbc68-Reviews.html", "metareview": "", "pdf_size": 408797, "gs_citation": 0, "gs_cited_by_link": "https://scholar.google.com/scholar?q=related:-Xv6Ru6G61EJ:scholar.google.com/&scioq=Constraints+Based+Convex+Belief+Propagation&hl=en&as_sdt=0,5", "gs_version_total": 4, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/dc960c46c38bd16e953d97cdeefdbc68-Abstract.html" }, { "title": "Contextual semibandits via supervised learning oracles", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7069", "id": "7069", "author_site": "Akshay Krishnamurthy, Alekh Agarwal, Miro Dudik", "author": "Akshay Krishnamurthy; Alekh Agarwal; Miro Dudik", "abstract": "We study an online decision making problem where on each round a learner chooses a list of items based on some side information, receives a scalar feedback value for each individual item, and a reward that is linearly related to this feedback. These problems, known as contextual semibandits, arise in crowdsourcing, recommendation, and many other domains. This paper reduces contextual semibandits to supervised learning, allowing us to leverage powerful supervised learning methods in this partial-feedback setting. Our first reduction applies when the mapping from feedback to reward is known and leads to a computationally efficient algorithm with near-optimal regret. We show that this algorithm outperforms state-of-the-art approaches on real-world learning-to-rank datasets, demonstrating the advantage of oracle-based algorithms. Our second reduction applies to the previously unstudied setting when the linear mapping from feedback to reward is unknown. Our regret guarantees are superior to prior techniques that ignore the feedback.", "bibtex": "@inproceedings{NIPS2016_e1d5be1c,\n author = {Krishnamurthy, Akshay and Agarwal, Alekh and Dudik, Miro},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Contextual semibandits via supervised learning oracles},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e1d5be1c7f2f456670de3d53c7b54f4a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e1d5be1c7f2f456670de3d53c7b54f4a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/e1d5be1c7f2f456670de3d53c7b54f4a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e1d5be1c7f2f456670de3d53c7b54f4a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e1d5be1c7f2f456670de3d53c7b54f4a-Reviews.html", "metareview": "", "pdf_size": 671523, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10483565199074216624&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "College of Information and Computer Sciences, University of Massachusetts, Amherst, MA; Microsoft Research, New York, NY; Microsoft Research, New York, NY", "aff_domain": "cs.umass.edu;microsoft.com;microsoft.com", "email": "cs.umass.edu;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e1d5be1c7f2f456670de3d53c7b54f4a-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Massachusetts Amherst;Microsoft", "aff_unique_dep": "College of Information and Computer Sciences;Microsoft Research", "aff_unique_url": "https://www.umass.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UMass Amherst;MSR", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Amherst;New York", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Convergence guarantees for kernel-based quadrature rules in misspecified settings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6959", "id": "6959", "author_site": "Motonobu Kanagawa, Bharath Sriperumbudur, Kenji Fukumizu", "author": "Motonobu Kanagawa; Bharath K. Sriperumbudur; Kenji Fukumizu", "abstract": "Kernel-based quadrature rules are becoming important in machine learning and statistics, as they achieve super-$\u00a5sqrt{n}$ convergence rates in numerical integration, and thus provide alternatives to Monte Carlo integration in challenging settings where integrands are expensive to evaluate or where integrands are high dimensional. These rules are based on the assumption that the integrand has a certain degree of smoothness, which is expressed as that the integrand belongs to a certain reproducing kernel Hilbert space (RKHS). However, this assumption can be violated in practice (e.g., when the integrand is a black box function), and no general theory has been established for the convergence of kernel quadratures in such misspecified settings. Our contribution is in proving that kernel quadratures can be consistent even when the integrand does not belong to the assumed RKHS, i.e., when the integrand is less smooth than assumed. Specifically, we derive convergence rates that depend on the (unknown) lesser smoothness of the integrand, where the degree of smoothness is expressed via powers of RKHSs or via Sobolev spaces.", "bibtex": "@inproceedings{NIPS2016_81c650ca,\n author = {Kanagawa, Motonobu and Sriperumbudur, Bharath K. and Fukumizu, Kenji},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Convergence guarantees for kernel-based quadrature rules in misspecified settings},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/81c650caac28cdefce4de5ddc18befa0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/81c650caac28cdefce4de5ddc18befa0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/81c650caac28cdefce4de5ddc18befa0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/81c650caac28cdefce4de5ddc18befa0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/81c650caac28cdefce4de5ddc18befa0-Reviews.html", "metareview": "", "pdf_size": 144310, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15891832595726058878&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "The Institute of Statistical Mathematics, Tokyo 190-8562, Japan; Department of Statistics, Pennsylvania State University, University Park, PA 16802, USA; The Institute of Statistical Mathematics, Tokyo 190-8562, Japan", "aff_domain": "ism.ac.jp;psu.edu;ism.ac.jp", "email": "ism.ac.jp;psu.edu;ism.ac.jp", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/81c650caac28cdefce4de5ddc18befa0-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Institute of Statistical Mathematics;Pennsylvania State University", "aff_unique_dep": ";Department of Statistics", "aff_unique_url": "https://www.ism.ac.jp;https://www.psu.edu", "aff_unique_abbr": "ISM;PSU", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Tokyo;University Park", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Japan;United States" }, { "title": "Convex Two-Layer Modeling with Latent Structure", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7080", "id": "7080", "author_site": "Vignesh Ganapathiraman, Xinhua Zhang, Yaoliang Yu, Junfeng Wen", "author": "Vignesh Ganapathiraman; Xinhua Zhang; Yaoliang Yu; Junfeng Wen", "abstract": "Unsupervised learning of structured predictors has been a long standing pursuit in machine learning. Recently a conditional random field auto-encoder has been proposed in a two-layer setting, allowing latent structured representation to be automatically inferred. Aside from being nonconvex, it also requires the demanding inference of normalization. In this paper, we develop a convex relaxation of two-layer conditional model which captures latent structure and estimates model parameters, jointly and optimally. We further expand its applicability by resorting to a weaker form of inference---maximum a-posteriori. The flexibility of the model is demonstrated on two structures based on total unimodularity---graph matching and linear chain. Experimental results confirm the promise of the method.", "bibtex": "@inproceedings{NIPS2016_5487315b,\n author = {Ganapathiraman, Vignesh and Zhang, Xinhua and Yu, Yaoliang and Wen, Junfeng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Convex Two-Layer Modeling with Latent Structure},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5487315b1286f907165907aa8fc96619-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5487315b1286f907165907aa8fc96619-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/5487315b1286f907165907aa8fc96619-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5487315b1286f907165907aa8fc96619-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5487315b1286f907165907aa8fc96619-Reviews.html", "metareview": "", "pdf_size": 457277, "gs_citation": 5, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4215763604479940955&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5487315b1286f907165907aa8fc96619-Abstract.html" }, { "title": "Convolutional Neural Fabrics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7313", "id": "7313", "author_site": "Shreyas Saxena, Jakob Verbeek", "author": "Shreyas Saxena; Jakob Verbeek", "abstract": "Despite the success of CNNs, selecting the optimal architecture for a given task remains an open problem. Instead of aiming to select a single optimal architecture, we propose a ``fabric'' that embeds an exponentially large number of architectures. The fabric consists of a 3D trellis that connects response maps at different layers, scales, and channels with a sparse homogeneous local connectivity pattern. The only hyper-parameters of a fabric are the number of channels and layers. While individual architectures can be recovered as paths, the fabric can in addition ensemble all embedded architectures together, sharing their weights where their paths overlap. Parameters can be learned using standard methods based on back-propagation, at a cost that scales linearly in the fabric size. We present benchmark results competitive with the state of the art for image classification on MNIST and CIFAR10, and for semantic segmentation on the Part Labels dataset.", "bibtex": "@inproceedings{NIPS2016_07811dc6,\n author = {Saxena, Shreyas and Verbeek, Jakob},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Convolutional Neural Fabrics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/07811dc6c422334ce36a09ff5cd6fe71-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/07811dc6c422334ce36a09ff5cd6fe71-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/07811dc6c422334ce36a09ff5cd6fe71-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/07811dc6c422334ce36a09ff5cd6fe71-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/07811dc6c422334ce36a09ff5cd6fe71-Reviews.html", "metareview": "", "pdf_size": 792445, "gs_citation": 267, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13382878732736797647&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "INRIA Grenoble \u2013 Laboratoire Jean Kuntzmann; INRIA Grenoble \u2013 Laboratoire Jean Kuntzmann", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/07811dc6c422334ce36a09ff5cd6fe71-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "INRIA Grenoble", "aff_unique_dep": "Laboratoire Jean Kuntzmann", "aff_unique_url": "https://www.inria.fr/grenoble", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Grenoble", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7008", "id": "7008", "author_site": "Micha\u00ebl Defferrard, Xavier Bresson, Pierre Vandergheynst", "author": "Micha\u00ebl Defferrard; Xavier Bresson; Pierre Vandergheynst", "abstract": "In this work, we are interested in generalizing convolutional neural networks (CNNs) from low-dimensional regular grids, where image, video and speech are represented, to high-dimensional irregular domains, such as social networks, brain connectomes or words\u2019 embedding, represented by graphs. We present a formulation of CNNs in the context of spectral graph theory, which provides the necessary mathematical background and efficient numerical schemes to design fast localized convolutional filters on graphs. Importantly, the proposed technique offers the same linear computational complexity and constant learning complexity as classical CNNs, while being universal to any graph structure. Experiments on MNIST and 20NEWS demonstrate the ability of this novel deep learning system to learn local, stationary, and compositional features on graphs.", "bibtex": "@inproceedings{NIPS2016_04df4d43,\n author = {Defferrard, Micha\\\"{e}l and Bresson, Xavier and Vandergheynst, Pierre},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/04df4d434d481c5bb723be1b6df1ee65-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/04df4d434d481c5bb723be1b6df1ee65-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/04df4d434d481c5bb723be1b6df1ee65-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/04df4d434d481c5bb723be1b6df1ee65-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/04df4d434d481c5bb723be1b6df1ee65-Reviews.html", "metareview": "", "pdf_size": 470470, "gs_citation": 10837, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18205894503371115148&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "EPFL, Lausanne, Switzerland; EPFL, Lausanne, Switzerland; EPFL, Lausanne, Switzerland", "aff_domain": "epfl.ch;epfl.ch;epfl.ch", "email": "epfl.ch;epfl.ch;epfl.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/04df4d434d481c5bb723be1b6df1ee65-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Cooperative Graphical Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7194", "id": "7194", "author_site": "Josip Djolonga, Stefanie Jegelka, Sebastian Tschiatschek, Andreas Krause", "author": "Josip Djolonga; Stefanie Jegelka; Sebastian Tschiatschek; Andreas Krause", "abstract": "We study a rich family of distributions that capture variable interactions significantly more expressive than those representable with low-treewidth or pairwise graphical models, or log-supermodular models. We call these cooperative graphical models. Yet, this family retains structure, which we carefully exploit for efficient inference techniques. Our algorithms combine the polyhedral structure of submodular functions in new ways with variational inference methods to obtain both lower and upper bounds on the partition function. While our fully convex upper bound is minimized as an SDP or via tree-reweighted belief propagation, our lower bound is tightened via belief propagation or mean-field algorithms. The resulting algorithms are easy to implement and, as our experiments show, effectively obtain good bounds and marginals for synthetic and real-world examples.", "bibtex": "@inproceedings{NIPS2016_8f855179,\n author = {Djolonga, Josip and Jegelka, Stefanie and Tschiatschek, Sebastian and Krause, Andreas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Cooperative Graphical Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8f85517967795eeef66c225f7883bdcb-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8f85517967795eeef66c225f7883bdcb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8f85517967795eeef66c225f7883bdcb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8f85517967795eeef66c225f7883bdcb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8f85517967795eeef66c225f7883bdcb-Reviews.html", "metareview": "", "pdf_size": 1579233, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4002210786118577328&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Dept. of Computer Science, ETH Z\u00fcrich; CSAIL, MIT; Dept. of Computer Science, ETH Z\u00fcrich; Dept. of Computer Science, ETH Z\u00fcrich", "aff_domain": "inf.ethz.ch;mit.edu;inf.ethz.ch;inf.ethz.ch", "email": "inf.ethz.ch;mit.edu;inf.ethz.ch;inf.ethz.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8f85517967795eeef66c225f7883bdcb-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "ETH Zurich;Massachusetts Institute of Technology", "aff_unique_dep": "Dept. of Computer Science;Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.ethz.ch;https://www.csail.mit.edu", "aff_unique_abbr": "ETHZ;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Switzerland;United States" }, { "title": "Cooperative Inverse Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6933", "id": "6933", "author_site": "Dylan Hadfield-Menell, Stuart J Russell, Pieter Abbeel, Anca Dragan", "author": "Dylan Hadfield-Menell; Stuart Russell; Pieter Abbeel; Anca Dragan", "abstract": "For an autonomous system to be helpful to humans and to pose no unwarranted risks, it needs to align its values with those of the humans in its environment in such a way that its actions contribute to the maximization of value for the humans. We propose a formal definition of the value alignment problem as cooperative inverse reinforcement learning (CIRL). A CIRL problem is a cooperative, partial- information game with two agents, human and robot; both are rewarded according to the human\u2019s reward function, but the robot does not initially know what this is. In contrast to classical IRL, where the human is assumed to act optimally in isolation, optimal CIRL solutions produce behaviors such as active teaching, active learning, and communicative actions that are more effective in achieving value alignment. We show that computing optimal joint policies in CIRL games can be reduced to solving a POMDP, prove that optimality in isolation is suboptimal in CIRL, and derive an approximate CIRL algorithm.", "bibtex": "@inproceedings{NIPS2016_c3395dd4,\n author = {Hadfield-Menell, Dylan and Russell, Stuart J and Abbeel, Pieter and Dragan, Anca},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Cooperative Inverse Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c3395dd46c34fa7fd8d729d8cf88b7a8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c3395dd46c34fa7fd8d729d8cf88b7a8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c3395dd46c34fa7fd8d729d8cf88b7a8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c3395dd46c34fa7fd8d729d8cf88b7a8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c3395dd46c34fa7fd8d729d8cf88b7a8-Reviews.html", "metareview": "", "pdf_size": 524447, "gs_citation": 900, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15627132950356078183&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 16, "aff": "Electrical Engineering and Computer Science, University of California at Berkeley; Electrical Engineering and Computer Science, University of California at Berkeley; Electrical Engineering and Computer Science, University of California at Berkeley; Electrical Engineering and Computer Science, University of California at Berkeley", "aff_domain": "cs.berkeley.edu;cs.berkeley.edu;cs.berkeley.edu;cs.berkeley.edu", "email": "cs.berkeley.edu;cs.berkeley.edu;cs.berkeley.edu;cs.berkeley.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c3395dd46c34fa7fd8d729d8cf88b7a8-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Electrical Engineering and Computer Science", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Coordinate-wise Power Method", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7381", "id": "7381", "author_site": "Qi Lei, Kai Zhong, Inderjit Dhillon", "author": "Qi Lei; Kai Zhong; Inderjit S Dhillon", "abstract": "In this paper, we propose a coordinate-wise version of the power method from an optimization viewpoint. The vanilla power method simultaneously updates all the coordinates of the iterate, which is essential for its convergence analysis. However, different coordinates converge to the optimal value at different speeds. Our proposed algorithm, which we call coordinate-wise power method, is able to select and update the most important k coordinates in O(kn) time at each iteration, where n is the dimension of the matrix and k <= n is the size of the active set. Inspired by the ''greedy'' nature of our method, we further propose a greedy coordinate descent algorithm applied on a non-convex objective function specialized for symmetric matrices. We provide convergence analyses for both methods. Experimental results on both synthetic and real data show that our methods achieve up to 20 times speedup over the basic power method. Meanwhile, due to their coordinate-wise nature, our methods are very suitable for the important case when data cannot fit into memory. Finally, we introduce how the coordinate-wise mechanism could be applied to other iterative methods that are used in machine learning.", "bibtex": "@inproceedings{NIPS2016_8b406655,\n author = {Lei, Qi and Zhong, Kai and Dhillon, Inderjit S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Coordinate-wise Power Method},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8b4066554730ddfaa0266346bdc1b202-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8b4066554730ddfaa0266346bdc1b202-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8b4066554730ddfaa0266346bdc1b202-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8b4066554730ddfaa0266346bdc1b202-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8b4066554730ddfaa0266346bdc1b202-Reviews.html", "metareview": "", "pdf_size": 2580002, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7636756455064189563&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Institute for Computational Engineering & Sciences + Department of Computer Science; Institute for Computational Engineering & Sciences + Department of Computer Science; Institute for Computational Engineering & Sciences + Department of Computer Science", "aff_domain": "ices.utexas.edu;ices.utexas.edu;cs.utexas.edu", "email": "ices.utexas.edu;ices.utexas.edu;cs.utexas.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8b4066554730ddfaa0266346bdc1b202-Abstract.html", "aff_unique_index": "0+1;0+1;0+1", "aff_unique_norm": "University of Texas at Austin;Unknown Institution", "aff_unique_dep": "Institute for Computational Engineering & Sciences;Department of Computer Science", "aff_unique_url": "https://ices.utexas.edu;", "aff_unique_abbr": "ICES;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "title": "Coresets for Scalable Bayesian Logistic Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7178", "id": "7178", "author_site": "Jonathan Huggins, Trevor Campbell, Tamara Broderick", "author": "Jonathan Huggins; Trevor Campbell; Tamara Broderick", "abstract": "The use of Bayesian methods in large-scale data settings is attractive because of the rich hierarchical models, uncertainty quantification, and prior specification they provide. Standard Bayesian inference algorithms are computationally expensive, however, making their direct application to large datasets difficult or infeasible. Recent work on scaling Bayesian inference has focused on modifying the underlying algorithms to, for example, use only a random data subsample at each iteration. We leverage the insight that data is often redundant to instead obtain a weighted subset of the data (called a coreset) that is much smaller than the original dataset. We can then use this small coreset in any number of existing posterior inference algorithms without modification. In this paper, we develop an efficient coreset construction algorithm for Bayesian logistic regression models. We provide theoretical guarantees on the size and approximation quality of the coreset -- both for fixed, known datasets, and in expectation for a wide class of data generative models. Crucially, the proposed approach also permits efficient construction of the coreset in both streaming and parallel settings, with minimal additional effort. We demonstrate the efficacy of our approach on a number of synthetic and real-world datasets, and find that, in practice, the size of the coreset is independent of the original dataset size. Furthermore, constructing the coreset takes a negligible amount of time compared to that required to run MCMC on it.", "bibtex": "@inproceedings{NIPS2016_2b0f658c,\n author = {Huggins, Jonathan and Campbell, Trevor and Broderick, Tamara},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Coresets for Scalable Bayesian Logistic Regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2b0f658cbffd284984fb11d90254081f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2b0f658cbffd284984fb11d90254081f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2b0f658cbffd284984fb11d90254081f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2b0f658cbffd284984fb11d90254081f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2b0f658cbffd284984fb11d90254081f-Reviews.html", "metareview": "", "pdf_size": 1524091, "gs_citation": 278, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5925279736068954288&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Computer Science and Artificial Intelligence Laboratory, MIT; Computer Science and Artificial Intelligence Laboratory, MIT; Computer Science and Artificial Intelligence Laboratory, MIT", "aff_domain": "mit.edu;mit.edu;csail.mit.edu", "email": "mit.edu;mit.edu;csail.mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2b0f658cbffd284984fb11d90254081f-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.csail.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Correlated-PCA: Principal Components' Analysis when Data and Noise are Correlated", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7195", "id": "7195", "author_site": "Namrata Vaswani, Han Guo", "author": "Namrata Vaswani; Han Guo", "abstract": "Given a matrix of observed data, Principal Components Analysis (PCA) computes a small number of orthogonal directions that contain most of its variability. Provably accurate solutions for PCA have been in use for a long time. However, to the best of our knowledge, all existing theoretical guarantees for it assume that the data and the corrupting noise are mutually independent, or at least uncorrelated. This is valid in practice often, but not always. In this paper, we study the PCA problem in the setting where the data and noise can be correlated. Such noise is often also referred to as ``data-dependent noise\". We obtain a correctness result for the standard eigenvalue decomposition (EVD) based solution to PCA under simple assumptions on the data-noise correlation. We also develop and analyze a generalization of EVD, cluster-EVD, that improves upon EVD in certain regimes.", "bibtex": "@inproceedings{NIPS2016_ef4e3b77,\n author = {Vaswani, Namrata and Guo, Han},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Correlated-PCA: Principal Components\\textquotesingle Analysis when Data and Noise are Correlated},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ef4e3b775c934dada217712d76f3d51f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ef4e3b775c934dada217712d76f3d51f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ef4e3b775c934dada217712d76f3d51f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ef4e3b775c934dada217712d76f3d51f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ef4e3b775c934dada217712d76f3d51f-Reviews.html", "metareview": "", "pdf_size": 325964, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5625464750354736061&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Iowa State University, Ames, IA, USA; Iowa State University, Ames, IA, USA", "aff_domain": "iastate.edu;iastate.edu", "email": "iastate.edu;iastate.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ef4e3b775c934dada217712d76f3d51f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ames", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Coupled Generative Adversarial Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6998", "id": "6998", "author_site": "Ming-Yu Liu, Oncel Tuzel", "author": "Ming-Yu Liu; Oncel Tuzel", "abstract": "We propose the coupled generative adversarial nets (CoGAN) framework for generating pairs of corresponding images in two different domains. The framework consists of a pair of generative adversarial nets, each responsible for generating images in one domain. We show that by enforcing a simple weight-sharing constraint, the CoGAN learns to generate pairs of corresponding images without existence of any pairs of corresponding images in the two domains in the training set. In other words, the CoGAN learns a joint distribution of images in the two domains from images drawn separately from the marginal distributions of the individual domains. This is in contrast to the existing multi-modal generative models, which require corresponding images for training. We apply the CoGAN to several pair image generation tasks. For each task, the CoGAN learns to generate convincing pairs of corresponding images. We further demonstrate the applications of the CoGAN framework for the domain adaptation and cross-domain image generation tasks.", "bibtex": "@inproceedings{NIPS2016_502e4a16,\n author = {Liu, Ming-Yu and Tuzel, Oncel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Coupled Generative Adversarial Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/502e4a16930e414107ee22b6198c578f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/502e4a16930e414107ee22b6198c578f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/502e4a16930e414107ee22b6198c578f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/502e4a16930e414107ee22b6198c578f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/502e4a16930e414107ee22b6198c578f-Reviews.html", "metareview": "", "pdf_size": 948915, "gs_citation": 2142, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9131569704953326400&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Mitsubishi Electric Research Labs (MERL); Mitsubishi Electric Research Labs (MERL)", "aff_domain": "merl.com;merl.com", "email": "merl.com;merl.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/502e4a16930e414107ee22b6198c578f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Mitsubishi Electric Research Labs", "aff_unique_dep": "", "aff_unique_url": "https://www.merl.com", "aff_unique_abbr": "MERL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Crowdsourced Clustering: Querying Edges vs Triangles", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6948", "id": "6948", "author_site": "Ramya Korlakai Vinayak, Babak Hassibi", "author": "Ramya Korlakai Vinayak; Babak Hassibi", "abstract": "We consider the task of clustering items using answers from non-expert crowd workers. In such cases, the workers are often not able to label the items directly, however, it is reasonable to assume that they can compare items and judge whether they are similar or not. An important question is what queries to make, and we compare two types: random edge queries, where a pair of items is revealed, and random triangles, where a triple is. Since it is far too expensive to query all possible edges and/or triangles, we need to work with partial observations subject to a fixed query budget constraint. When a generative model for the data is available (and we consider a few of these) we determine the cost of a query by its entropy; when such models do not exist we use the average response time per query of the workers as a surrogate for the cost. In addition to theoretical justification, through several simulations and experiments on two real data sets on Amazon Mechanical Turk, we empirically demonstrate that, for a fixed budget, triangle queries uniformly outperform edge queries. Even though, in contrast to edge queries, triangle queries reveal dependent edges, they provide more reliable edges and, for a fixed budget, many more of them. We also provide a sufficient condition on the number of observations, edge densities inside and outside the clusters and the minimum cluster size required for the exact recovery of the true adjacency matrix via triangle queries using a convex optimization-based clustering algorithm.", "bibtex": "@inproceedings{NIPS2016_82f2b308,\n author = {Korlakai Vinayak, Ramya and Hassibi, Babak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Crowdsourced Clustering: Querying Edges vs Triangles},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/82f2b308c3b01637c607ce05f52a2fed-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/82f2b308c3b01637c607ce05f52a2fed-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/82f2b308c3b01637c607ce05f52a2fed-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/82f2b308c3b01637c607ce05f52a2fed-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/82f2b308c3b01637c607ce05f52a2fed-Reviews.html", "metareview": "", "pdf_size": 974204, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7767121027670779269&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Electrical Engineering, Caltech, Pasadena; Department of Electrical Engineering, Caltech, Pasadena", "aff_domain": "caltech.edu;systems.caltech.edu", "email": "caltech.edu;systems.caltech.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/82f2b308c3b01637c607ce05f52a2fed-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "California Institute of Technology", "aff_unique_dep": "Department of Electrical Engineering", "aff_unique_url": "https://www.caltech.edu", "aff_unique_abbr": "Caltech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pasadena", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Cyclades: Conflict-free Asynchronous Machine Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7326", "id": "7326", "author_site": "Xinghao Pan, Maximilian Lam, Stephen Tu, Dimitris Papailiopoulos, Ce Zhang, Michael Jordan, Kannan Ramchandran, Christopher R\u00e9, Benjamin Recht", "author": "Xinghao Pan; Maximilian Lam; Stephen Tu; Dimitris Papailiopoulos; Ce Zhang; Michael I Jordan; Kannan Ramchandran; Christopher R\u00e9", "abstract": "We present Cyclades, a general framework for parallelizing stochastic optimization algorithms in a shared memory setting. Cyclades is asynchronous during model updates, and requires no memory locking mechanisms, similar to Hogwild!-type algorithms. Unlike Hogwild!, Cyclades introduces no conflicts during parallel execution, and offers a black-box analysis for provable speedups across a large family of algorithms. Due to its inherent cache locality and conflict-free nature, our multi-core implementation of Cyclades consistently outperforms Hogwild!-type algorithms on sufficiently sparse datasets, leading to up to 40% speedup gains compared to Hogwild!, and up to 5\\times gains over asynchronous implementations of variance reduction algorithms.", "bibtex": "@inproceedings{NIPS2016_28e209b6,\n author = {Pan, Xinghao and Lam, Maximilian and Tu, Stephen and Papailiopoulos, Dimitris and Zhang, Ce and Jordan, Michael I and Ramchandran, Kannan and R\\'{e}, Christopher},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Cyclades: Conflict-free Asynchronous Machine Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/28e209b61a52482a0ae1cb9f5959c792-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/28e209b61a52482a0ae1cb9f5959c792-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/28e209b61a52482a0ae1cb9f5959c792-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/28e209b61a52482a0ae1cb9f5959c792-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/28e209b61a52482a0ae1cb9f5959c792-Reviews.html", "metareview": "", "pdf_size": 1856290, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6456186422628009213&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Electrical Engineering and Computer Science, UC Berkeley, Berkeley, CA; Department of Electrical Engineering and Computer Science, UC Berkeley, Berkeley, CA; Department of Electrical Engineering and Computer Science, UC Berkeley, Berkeley, CA; Department of Electrical Engineering and Computer Science, UC Berkeley, Berkeley, CA; Department of Computer Science, Stanford University, Palo Alto, CA + Department of Statistics, UC Berkeley, Berkeley, CA; Department of Electrical Engineering and Computer Science, UC Berkeley, Berkeley, CA + Department of Statistics, UC Berkeley, Berkeley, CA; Department of Computer Science, Stanford University, Palo Alto, CA; Department of Electrical Engineering and Computer Science, UC Berkeley, Berkeley, CA + Department of Statistics, UC Berkeley, Berkeley, CA", "aff_domain": ";;;;;;;", "email": ";;;;;;;", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/28e209b61a52482a0ae1cb9f5959c792-Abstract.html", "aff_unique_index": "0;0;0;0;1+0;0+0;1;0+0", "aff_unique_norm": "University of California, Berkeley;Stanford University", "aff_unique_dep": "Department of Electrical Engineering and Computer Science;Department of Computer Science", "aff_unique_url": "https://www.berkeley.edu;https://www.stanford.edu", "aff_unique_abbr": "UC Berkeley;Stanford", "aff_campus_unique_index": "0;0;0;0;1+0;0+0;1;0+0", "aff_campus_unique": "Berkeley;Palo Alto", "aff_country_unique_index": "0;0;0;0;0+0;0+0;0;0+0", "aff_country_unique": "United States" }, { "title": "DECOrrelated feature space partitioning for distributed sparse regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6997", "id": "6997", "author_site": "Xiangyu Wang, David B Dunson, Chenlei Leng", "author": "Xiangyu Wang; David B Dunson; Chenlei Leng", "abstract": "Fitting statistical models is computationally challenging when the sample size or the dimension of the dataset is huge. An attractive approach for down-scaling the problem size is to first partition the dataset into subsets and then fit using distributed algorithms. The dataset can be partitioned either horizontally (in the sample space) or vertically (in the feature space). While the majority of the literature focuses on sample space partitioning, feature space partitioning is more effective when p >> n. Existing methods for partitioning features, however, are either vulnerable to high correlations or inefficient in reducing the model dimension. In this paper, we solve these problems through a new embarrassingly parallel framework named DECO for distributed variable selection and parameter estimation. In DECO, variables are first partitioned and allocated to m distributed workers. The decorrelated subset data within each worker are then fitted via any algorithm designed for high-dimensional problems. We show that by incorporating the decorrelation step, DECO can achieve consistent variable selection and parameter estimation on each subset with (almost) no assumptions. In addition, the convergence rate is nearly minimax optimal for both sparse and weakly sparse models and does NOT depend on the partition number m. Extensive numerical experiments are provided to illustrate the performance of the new framework.", "bibtex": "@inproceedings{NIPS2016_cfee3986,\n author = {Wang, Xiangyu and Dunson, David B and Leng, Chenlei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {DECOrrelated feature space partitioning for distributed sparse regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/cfee398643cbc3dc5eefc89334cacdc1-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/cfee398643cbc3dc5eefc89334cacdc1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/cfee398643cbc3dc5eefc89334cacdc1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/cfee398643cbc3dc5eefc89334cacdc1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/cfee398643cbc3dc5eefc89334cacdc1-Reviews.html", "metareview": "", "pdf_size": 355258, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8834294431549675314&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Dept. of Statistical Science, Duke University; Dept. of Statistical Science, Duke University; Dept. of Statistics, University of Warwick", "aff_domain": "gmail.com;stat.duke.edu;warwick.ac.uk", "email": "gmail.com;stat.duke.edu;warwick.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/cfee398643cbc3dc5eefc89334cacdc1-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Duke University;University of Warwick", "aff_unique_dep": "Dept. of Statistical Science;Dept. of Statistics", "aff_unique_url": "https://www.duke.edu;https://warwick.ac.uk", "aff_unique_abbr": "Duke;Warwick", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "title": "DISCO Nets : DISsimilarity COefficients Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8502", "id": "8502", "author_site": "Diane Bouchacourt, Pawan K Mudigonda, Sebastian Nowozin", "author": "Diane Bouchacourt; Pawan K Mudigonda; Sebastian Nowozin", "abstract": "We present a new type of probabilistic model which we call DISsimilarity COefficient Networks (DISCO Nets). DISCO Nets allow us to efficiently sample from a posterior distribution parametrised by a neural network. During training, DISCO Nets are learned by minimising the dissimilarity coefficient between the true distribution and the estimated distribution. This allows us to tailor the training to the loss related to the task at hand. We empirically show that (i) by modeling uncertainty on the output value, DISCO Nets outperform equivalent non-probabilistic predictive networks and (ii) DISCO Nets accurately model the uncertainty of the output, outperforming existing probabilistic models based on deep neural networks.", "bibtex": "@inproceedings{NIPS2016_c0e190d8,\n author = {Bouchacourt, Diane and Mudigonda, Pawan K and Nowozin, Sebastian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {DISCO Nets : DISsimilarity COefficients Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c0e190d8267e36708f955d7ab048990d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c0e190d8267e36708f955d7ab048990d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c0e190d8267e36708f955d7ab048990d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c0e190d8267e36708f955d7ab048990d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c0e190d8267e36708f955d7ab048990d-Reviews.html", "metareview": "", "pdf_size": 3192366, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14555124152072309268&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "University of Oxford; University of Oxford; Microsoft Research Cambridge", "aff_domain": "robots.ox.ac.uk;robots.ox.ac.uk;microsoft.com", "email": "robots.ox.ac.uk;robots.ox.ac.uk;microsoft.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c0e190d8267e36708f955d7ab048990d-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Oxford;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.ox.ac.uk;https://www.microsoft.com/en-us/research/group/microsoft-research-cambridge", "aff_unique_abbr": "Oxford;MSR Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Data Poisoning Attacks on Factorization-Based Collaborative Filtering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7244", "id": "7244", "author_site": "Bo Li, Yining Wang, Aarti Singh, Yevgeniy Vorobeychik", "author": "Bo Li; Yining Wang; Aarti Singh; Yevgeniy Vorobeychik", "abstract": "Recommendation and collaborative filtering systems are important in modern information and e-commerce applications. As these systems are becoming increasingly popular in industry, their outputs could affect business decision making, introducing incentives for an adversarial party to compromise the availability or integrity of such systems. We introduce a data poisoning attack on collaborative filtering systems. We demonstrate how a powerful attacker with full knowledge of the learner can generate malicious data so as to maximize his/her malicious objectives, while at the same time mimicking normal user behaviors to avoid being detected. While the complete knowledge assumption seems extreme, it enables a robust assessment of the vulnerability of collaborative filtering schemes to highly motivated attacks. We present efficient solutions for two popular factorization-based collaborative filtering algorithms: the alternative minimization formulation and the nuclear norm minimization method. Finally, we test the effectiveness of our proposed algorithms on real-world data and discuss potential defensive strategies.", "bibtex": "@inproceedings{NIPS2016_83fa5a43,\n author = {Li, Bo and Wang, Yining and Singh, Aarti and Vorobeychik, Yevgeniy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Data Poisoning Attacks on Factorization-Based Collaborative Filtering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/83fa5a432ae55c253d0e60dbfa716723-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/83fa5a432ae55c253d0e60dbfa716723-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/83fa5a432ae55c253d0e60dbfa716723-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/83fa5a432ae55c253d0e60dbfa716723-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/83fa5a432ae55c253d0e60dbfa716723-Reviews.html", "metareview": "", "pdf_size": 787284, "gs_citation": 444, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14272718160753446643&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Vanderbilt University; Carnegie Mellon University; Carnegie Mellon University; Vanderbilt University", "aff_domain": "vanderbilt.edu;gmail.com;cs.cmu.edu;vanderbilt.edu", "email": "vanderbilt.edu;gmail.com;cs.cmu.edu;vanderbilt.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/83fa5a432ae55c253d0e60dbfa716723-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Vanderbilt University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.vanderbilt.edu;https://www.cmu.edu", "aff_unique_abbr": "Vanderbilt;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Data Programming: Creating Large Training Sets, Quickly", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7031", "id": "7031", "author_site": "Alexander Ratner, Christopher M De Sa, Sen Wu, Daniel Selsam, Christopher R\u00e9", "author": "Alexander J Ratner; Christopher M De Sa; Sen Wu; Daniel Selsam; Christopher R\u00e9", "abstract": "Large labeled training sets are the critical building blocks of supervised learning methods and are key enablers of deep learning techniques. For some applications, creating labeled training sets is the most time-consuming and expensive part of applying machine learning. We therefore propose a paradigm for the programmatic creation of training sets called data programming in which users provide a set of labeling functions, which are programs that heuristically label subsets of the data, but that are noisy and may conflict. By viewing these labeling functions as implicitly describing a generative model for this noise, we show that we can recover the parameters of this model to \"denoise\" the generated training set, and establish theoretically that we can recover the parameters of these generative models in a handful of settings. We then show how to modify a discriminative loss function to make it noise-aware, and demonstrate our method over a range of discriminative models including logistic regression and LSTMs. Experimentally, on the 2014 TAC-KBP Slot Filling challenge, we show that data programming would have led to a new winning score, and also show that applying data programming to an LSTM model leads to a TAC-KBP score almost 6 F1 points over a state-of-the-art LSTM baseline (and into second place in the competition). Additionally, in initial user studies we observed that data programming may be an easier way for non-experts to create machine learning models when training data is limited or unavailable.", "bibtex": "@inproceedings{NIPS2016_6709e8d6,\n author = {Ratner, Alexander J and De Sa, Christopher M and Wu, Sen and Selsam, Daniel and R\\'{e}, Christopher},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Data Programming: Creating Large Training Sets, Quickly},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6709e8d64a5f47269ed5cea9f625f7ab-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6709e8d64a5f47269ed5cea9f625f7ab-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6709e8d64a5f47269ed5cea9f625f7ab-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6709e8d64a5f47269ed5cea9f625f7ab-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6709e8d64a5f47269ed5cea9f625f7ab-Reviews.html", "metareview": "", "pdf_size": 157889, "gs_citation": 910, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14359672499909857504&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "Stanford University; Stanford University; Stanford University; Stanford University; Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6709e8d64a5f47269ed5cea9f625f7ab-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Data driven estimation of Laplace-Beltrami operator", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7060", "id": "7060", "author_site": "Frederic Chazal, Ilaria Giulini, Bertrand Michel", "author": "Frederic Chazal; Ilaria Giulini; Bertrand Michel", "abstract": "Approximations of Laplace-Beltrami operators on manifolds through graph Laplacians have become popular tools in data analysis and machine learning. These discretized operators usually depend on bandwidth parameters whose tuning remains a theoretical and practical problem. In this paper, we address this problem for the unormalized graph Laplacian by establishing an oracle inequality that opens the door to a well-founded data-driven procedure for the bandwidth selection. Our approach relies on recent results by Lacour and Massart (2015) on the so-called Lepski's method.", "bibtex": "@inproceedings{NIPS2016_dd055f53,\n author = {Chazal, Frederic and Giulini, Ilaria and Michel, Bertrand},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Data driven estimation of Laplace-Beltrami operator},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/dd055f53a45702fe05e449c30ac80df9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/dd055f53a45702fe05e449c30ac80df9-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/dd055f53a45702fe05e449c30ac80df9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/dd055f53a45702fe05e449c30ac80df9-Reviews.html", "metareview": "", "pdf_size": 392115, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4786913906312376024&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "Inria Saclay, Palaiseau France; Inria Saclay, Palaiseau France; Ecole Centrale de Nantes, Laboratoire de Math\u00e9matiques Jean Leray (UMR 6629 CNRS), Nantes France", "aff_domain": "inria.fr;me.com;ec-nantes.fr", "email": "inria.fr;me.com;ec-nantes.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/dd055f53a45702fe05e449c30ac80df9-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "INRIA;Ecole Centrale de Nantes", "aff_unique_dep": ";Laboratoire de Math\u00e9matiques Jean Leray (UMR 6629 CNRS)", "aff_unique_url": "https://www.inria.fr;https://www.ecn.fr", "aff_unique_abbr": "Inria;ECN", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Saclay;Nantes", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Deconvolving Feedback Loops in Recommender Systems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7151", "id": "7151", "author_site": "Ayan Sinha, David Gleich, Karthik Ramani", "author": "Ayan Sinha; David F Gleich; Karthik Ramani", "abstract": "Collaborative filtering is a popular technique to infer users' preferences on new content based on the collective information of all users preferences. Recommender systems then use this information to make personalized suggestions to users. When users accept these recommendations it creates a feedback loop in the recommender system, and these loops iteratively influence the collaborative filtering algorithm's predictions over time. We investigate whether it is possible to identify items affected by these feedback loops. We state sufficient assumptions to deconvolve the feedback loops while keeping the inverse solution tractable. We furthermore develop a metric to unravel the recommender system's influence on the entire user-item rating matrix. We use this metric on synthetic and real-world datasets to (1) identify the extent to which the recommender system affects the final rating matrix, (2) rank frequently recommended items, and (3) distinguish whether a user's rated item was recommended or an intrinsic preference. Our results indicate that it is possible to recover the ratings matrix of intrinsic user preferences using a single snapshot of the ratings matrix without any temporal information.", "bibtex": "@inproceedings{NIPS2016_962e56a8,\n author = {Sinha, Ayan and Gleich, David F and Ramani, Karthik},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deconvolving Feedback Loops in Recommender Systems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/962e56a8a0b0420d87272a682bfd1e53-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/962e56a8a0b0420d87272a682bfd1e53-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/962e56a8a0b0420d87272a682bfd1e53-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/962e56a8a0b0420d87272a682bfd1e53-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/962e56a8a0b0420d87272a682bfd1e53-Reviews.html", "metareview": "", "pdf_size": 1565267, "gs_citation": 99, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4024090762537583083&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Purdue University; Purdue University; Purdue University", "aff_domain": "mit.edu;purdue.edu;purdue.edu", "email": "mit.edu;purdue.edu;purdue.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/962e56a8a0b0420d87272a682bfd1e53-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Deep ADMM-Net for Compressive Sensing MRI", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7366", "id": "7366", "author_site": "Yan Yang, Jian Sun, Huibin Li, Zongben Xu", "author": "yan yang; Jian Sun; Huibin Li; Zongben Xu", "abstract": "Compressive Sensing (CS) is an effective approach for fast Magnetic Resonance Imaging (MRI). It aims at reconstructing MR image from a small number of under-sampled data in k-space, and accelerating the data acquisition in MRI. To improve the current MRI system in reconstruction accuracy and computational speed, in this paper, we propose a novel deep architecture, dubbed ADMM-Net. ADMM-Net is defined over a data flow graph, which is derived from the iterative procedures in Alternating Direction Method of Multipliers (ADMM) algorithm for optimizing a CS-based MRI model. In the training phase, all parameters of the net, e.g., image transforms, shrinkage functions, etc., are discriminatively trained end-to-end using L-BFGS algorithm. In the testing phase, it has computational overhead similar to ADMM but uses optimized parameters learned from the training data for CS-based reconstruction task. Experiments on MRI image reconstruction under different sampling ratios in k-space demonstrate that it significantly improves the baseline ADMM algorithm and achieves high reconstruction accuracies with fast computational speed.", "bibtex": "@inproceedings{NIPS2016_1679091c,\n author = {yang, yan and Sun, Jian and Li, Huibin and Xu, Zongben},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep ADMM-Net for Compressive Sensing MRI},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1679091c5a880faf6fb5e6087eb1b2dc-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1679091c5a880faf6fb5e6087eb1b2dc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/1679091c5a880faf6fb5e6087eb1b2dc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1679091c5a880faf6fb5e6087eb1b2dc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1679091c5a880faf6fb5e6087eb1b2dc-Reviews.html", "metareview": "", "pdf_size": 1294192, "gs_citation": 1382, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17710018232730379071&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 5, "aff": "Xi\u2019an Jiaotong University; Xi\u2019an Jiaotong University; Xi\u2019an Jiaotong University; Xi\u2019an Jiaotong University", "aff_domain": "stu.xjtu.edu.cn;mail.xjtu.edu.cn;mail.xjtu.edu.cn;mail.xjtu.edu.cn", "email": "stu.xjtu.edu.cn;mail.xjtu.edu.cn;mail.xjtu.edu.cn;mail.xjtu.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1679091c5a880faf6fb5e6087eb1b2dc-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Xi'an Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.xjtu.edu.cn", "aff_unique_abbr": "XJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Deep Alternative Neural Network: Exploring Contexts as Early as Possible for Action Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7002", "id": "7002", "author_site": "Jinzhuo Wang, Wenmin Wang, xiongtao Chen, Ronggang Wang, Wen Gao", "author": "Jinzhuo Wang; Wenmin Wang; xiongtao Chen; Ronggang Wang; Wen Gao", "abstract": "Contexts are crucial for action recognition in video. Current methods often mine contexts after extracting hierarchical local features and focus on their high-order encodings. This paper instead explores contexts as early as possible and leverages their evolutions for action recognition. In particular, we introduce a novel architecture called deep alternative neural network (DANN) stacking alternative layers. Each alternative layer consists of a volumetric convolutional layer followed by a recurrent layer. The former acts as local feature learner while the latter is used to collect contexts. Compared with feed-forward neural networks, DANN learns contexts of local features from the very beginning. This setting helps to preserve hierarchical context evolutions which we show are essential to recognize similar actions. Besides, we present an adaptive method to determine the temporal size for network input based on optical flow energy, and develop a volumetric pyramid pooling layer to deal with input clips of arbitrary sizes. We demonstrate the advantages of DANN on two benchmarks HMDB51 and UCF101 and report competitive or superior results to the state-of-the-art.", "bibtex": "@inproceedings{NIPS2016_6ea2ef73,\n author = {Wang, Jinzhuo and Wang, Wenmin and Chen, xiongtao and Wang, Ronggang and Gao, Wen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Alternative Neural Network: Exploring Contexts as Early as Possible for Action Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6ea2ef7311b482724a9b7b0bc0dd85c6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6ea2ef7311b482724a9b7b0bc0dd85c6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6ea2ef7311b482724a9b7b0bc0dd85c6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6ea2ef7311b482724a9b7b0bc0dd85c6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6ea2ef7311b482724a9b7b0bc0dd85c6-Reviews.html", "metareview": "", "pdf_size": 6005574, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=726465081693483605&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "School of Electronics and Computer Engineering, Peking University; School of Electronics and Computer Engineering, Peking University; School of Electronics and Computer Engineering, Peking University; School of Electronics and Computer Engineering, Peking University; School of Electronics Engineering and Computer Science, Peking University", "aff_domain": "pku.edu.cn;ece.pku.edu.cn;pku.edu.cn;ece.pku.edu.cn;pku.edu.cn", "email": "pku.edu.cn;ece.pku.edu.cn;pku.edu.cn;ece.pku.edu.cn;pku.edu.cn", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6ea2ef7311b482724a9b7b0bc0dd85c6-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "School of Electronics and Computer Engineering", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "PKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Deep Exploration via Bootstrapped DQN", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7014", "id": "7014", "author_site": "Ian Osband, Charles Blundell, Alexander Pritzel, Benjamin Van Roy", "author": "Ian Osband; Charles Blundell; Alexander Pritzel; Benjamin Van Roy", "abstract": "Efficient exploration remains a major challenge for reinforcement learning (RL). Common dithering strategies for exploration, such as epsilon-greedy, do not carry out temporally-extended (or deep) exploration; this can lead to exponentially larger data requirements. However, most algorithms for statistically efficient RL are not computationally tractable in complex environments. Randomized value functions offer a promising approach to efficient exploration with generalization, but existing algorithms are not compatible with nonlinearly parameterized value functions. As a first step towards addressing such contexts we develop bootstrapped DQN. We demonstrate that bootstrapped DQN can combine deep exploration with deep neural networks for exponentially faster learning than any dithering strategy. In the Arcade Learning Environment bootstrapped DQN substantially improves learning speed and cumulative performance across most games.", "bibtex": "@inproceedings{NIPS2016_8d8818c8,\n author = {Osband, Ian and Blundell, Charles and Pritzel, Alexander and Van Roy, Benjamin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Exploration via Bootstrapped DQN},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8d8818c8e140c64c743113f563cf750f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8d8818c8e140c64c743113f563cf750f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8d8818c8e140c64c743113f563cf750f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8d8818c8e140c64c743113f563cf750f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8d8818c8e140c64c743113f563cf750f-Reviews.html", "metareview": "", "pdf_size": 5397015, "gs_citation": 1687, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1614250880059729675&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Stanford University+Google DeepMind; Google DeepMind; Google DeepMind; Stanford University", "aff_domain": "google.com;google.com;google.com;stanford.edu", "email": "google.com;google.com;google.com;stanford.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8d8818c8e140c64c743113f563cf750f-Abstract.html", "aff_unique_index": "0+1;1;1;0", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.stanford.edu;https://deepmind.com", "aff_unique_abbr": "Stanford;DeepMind", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0+1;1;1;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Deep Learning Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7082", "id": "7082", "author_site": "Dale Schuurmans, Martin A Zinkevich", "author": "Dale Schuurmans; Martin A Zinkevich", "abstract": "We investigate a reduction of supervised learning to game playing that reveals new connections and learning methods. For convex one-layer problems, we demonstrate an equivalence between global minimizers of the training problem and Nash equilibria in a simple game. We then show how the game can be extended to general acyclic neural networks with differentiable convex gates, establishing a bijection between the Nash equilibria and critical (or KKT) points of the deep learning problem. Based on these connections we investigate alternative learning methods, and find that regret matching can achieve competitive training performance while producing sparser models than current deep learning approaches.", "bibtex": "@inproceedings{NIPS2016_c4015b7f,\n author = {Schuurmans, Dale and Zinkevich, Martin A},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Learning Games},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c4015b7f368e6b4871809f49debe0579-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c4015b7f368e6b4871809f49debe0579-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c4015b7f368e6b4871809f49debe0579-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c4015b7f368e6b4871809f49debe0579-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c4015b7f368e6b4871809f49debe0579-Reviews.html", "metareview": "", "pdf_size": 489172, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15077798161821747783&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Google + University of Alberta; Google", "aff_domain": "ualberta.ca;google.com", "email": "ualberta.ca;google.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c4015b7f368e6b4871809f49debe0579-Abstract.html", "aff_unique_index": "0+1;0", "aff_unique_norm": "Google;University of Alberta", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.ualberta.ca", "aff_unique_abbr": "Google;UAlberta", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0+1;0", "aff_country_unique": "United States;Canada" }, { "title": "Deep Learning Models of the Retinal Response to Natural Scenes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7010", "id": "7010", "author_site": "Lane McIntosh, Niru Maheswaranathan, Aran Nayebi, Surya Ganguli, Stephen Baccus", "author": "Lane McIntosh; Niru Maheswaranathan; Aran Nayebi; Surya Ganguli; Stephen Baccus", "abstract": "A central challenge in sensory neuroscience is to understand neural computations and circuit mechanisms that underlie the encoding of ethologically relevant, natural stimuli. In multilayered neural circuits, nonlinear processes such as synaptic transmission and spiking dynamics present a significant obstacle to the creation of accurate computational models of responses to natural stimuli. Here we demonstrate that deep convolutional neural networks (CNNs) capture retinal responses to natural scenes nearly to within the variability of a cell's response, and are markedly more accurate than linear-nonlinear (LN) models and Generalized Linear Models (GLMs). Moreover, we find two additional surprising properties of CNNs: they are less susceptible to overfitting than their LN counterparts when trained on small amounts of data, and generalize better when tested on stimuli drawn from a different distribution (e.g. between natural scenes and white noise). An examination of the learned CNNs reveals several properties. First, a richer set of feature maps is necessary for predicting the responses to natural scenes compared to white noise. Second, temporally precise responses to slowly varying inputs originate from feedforward inhibition, similar to known retinal mechanisms. Third, the injection of latent noise sources in intermediate layers enables our model to capture the sub-Poisson spiking variability observed in retinal ganglion cells. Fourth, augmenting our CNNs with recurrent lateral connections enables them to capture contrast adaptation as an emergent property of accurately describing retinal responses to natural scenes. These methods can be readily generalized to other sensory modalities and stimulus ensembles. Overall, this work demonstrates that CNNs not only accurately capture sensory circuit responses to natural scenes, but also can yield information about the circuit's internal structure and function.", "bibtex": "@inproceedings{NIPS2016_a1d33d0d,\n author = {McIntosh, Lane and Maheswaranathan, Niru and Nayebi, Aran and Ganguli, Surya and Baccus, Stephen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Learning Models of the Retinal Response to Natural Scenes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a1d33d0dfec820b41b54430b50e96b5c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a1d33d0dfec820b41b54430b50e96b5c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a1d33d0dfec820b41b54430b50e96b5c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a1d33d0dfec820b41b54430b50e96b5c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a1d33d0dfec820b41b54430b50e96b5c-Reviews.html", "metareview": "", "pdf_size": 2867560, "gs_citation": 318, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14725773497076530478&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "Neurosciences PhD Program; Neurosciences PhD Program; Neurosciences PhD Program; Department of Applied Physics+Neurobiology Department; Neurobiology Department", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a1d33d0dfec820b41b54430b50e96b5c-Abstract.html", "aff_unique_index": "0;0;0;1+2;2", "aff_unique_norm": "Neurosciences PhD Program;Institution Name Not Provided;Neurobiology Department", "aff_unique_dep": "Neurosciences;Department of Applied Physics;Neurobiology Department", "aff_unique_url": ";;", "aff_unique_abbr": ";;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "Deep Learning for Predicting Human Strategic Behavior", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7215", "id": "7215", "author_site": "Jason Hartford, James R Wright, Kevin Leyton-Brown", "author": "Jason S Hartford; James R Wright; Kevin Leyton-Brown", "abstract": "Predicting the behavior of human participants in strategic settings is an important problem in many domains. Most existing work either assumes that participants are perfectly rational, or attempts to directly model each participant's cognitive processes based on insights from cognitive psychology and experimental economics. In this work, we present an alternative, a deep learning approach that automatically performs cognitive modeling without relying on such expert knowledge. We introduce a novel architecture that allows a single network to generalize across different input and output dimensions by using matrix units rather than scalar units, and show that its performance significantly outperforms that of the previous state of the art, which relies on expert-constructed features.", "bibtex": "@inproceedings{NIPS2016_7eb3c8be,\n author = {Hartford, Jason S and Wright, James R and Leyton-Brown, Kevin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Learning for Predicting Human Strategic Behavior},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7eb3c8be3d411e8ebfab08eba5f49632-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7eb3c8be3d411e8ebfab08eba5f49632-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7eb3c8be3d411e8ebfab08eba5f49632-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7eb3c8be3d411e8ebfab08eba5f49632-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7eb3c8be3d411e8ebfab08eba5f49632-Reviews.html", "metareview": "", "pdf_size": 821418, "gs_citation": 123, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4494701713055412019&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "Department of Computer Science, University of British Columbia; Department of Computer Science, University of British Columbia; Department of Computer Science, University of British Columbia", "aff_domain": "cs.ubc.ca;cs.ubc.ca;cs.ubc.ca", "email": "cs.ubc.ca;cs.ubc.ca;cs.ubc.ca", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7eb3c8be3d411e8ebfab08eba5f49632-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Vancouver", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Deep Learning without Poor Local Minima", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7409", "id": "7409", "author": "Kenji Kawaguchi", "abstract": "In this paper, we prove a conjecture published in 1989 and also partially address an open problem announced at the Conference on Learning Theory (COLT) 2015. For an expected loss function of a deep nonlinear neural network, we prove the following statements under the independence assumption adopted from recent work: 1) the function is non-convex and non-concave, 2) every local minimum is a global minimum, 3) every critical point that is not a global minimum is a saddle point, and 4) the property of saddle points differs for shallow networks (with three layers) and deeper networks (with more than three layers). Moreover, we prove that the same four statements hold for deep linear neural networks with any depth, any widths and no unrealistic assumptions. As a result, we present an instance, for which we can answer to the following question: how difficult to directly train a deep model in theory? It is more difficult than the classical machine learning models (because of the non-convexity), but not too difficult (because of the nonexistence of poor local minima and the property of the saddle points). We note that even though we have advanced the theoretical foundations of deep learning, there is still a gap between theory and practice.", "bibtex": "@inproceedings{NIPS2016_f2fc9902,\n author = {Kawaguchi, Kenji},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Learning without Poor Local Minima},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f2fc990265c712c49d51a18a32b39f0c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f2fc990265c712c49d51a18a32b39f0c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f2fc990265c712c49d51a18a32b39f0c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f2fc990265c712c49d51a18a32b39f0c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f2fc990265c712c49d51a18a32b39f0c-Reviews.html", "metareview": "", "pdf_size": 170371, "gs_citation": 1175, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16088411110075986174&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Massachusetts Institute of Technology", "aff_domain": "mit.edu", "email": "mit.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f2fc990265c712c49d51a18a32b39f0c-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Deep Neural Networks with Inexact Matching for Person Re-Identification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7394", "id": "7394", "author_site": "Arulkumar Subramaniam, Moitreya Chatterjee, Anurag Mittal", "author": "Arulkumar Subramaniam; Moitreya Chatterjee; Anurag Mittal", "abstract": "Person Re-Identification is the task of matching images of a person across multiple camera views. Almost all prior approaches address this challenge by attempting to learn the possible transformations that relate the different views of a person from a training corpora. Then, they utilize these transformation patterns for matching a query image to those in a gallery image bank at test time. This necessitates learning good feature representations of the images and having a robust feature matching technique. Deep learning approaches, such as Convolutional Neural Networks (CNN), simultaneously do both and have shown great promise recently. In this work, we propose two CNN-based architectures for Person Re-Identification. In the first, given a pair of images, we extract feature maps from these images via multiple stages of convolution and pooling. A novel inexact matching technique then matches pixels in the first representation with those of the second. Furthermore, we search across a wider region in the second representation for matching. Our novel matching technique allows us to tackle the challenges posed by large viewpoint variations, illumination changes or partial occlusions. Our approach shows a promising performance and requires only about half the parameters as a current state-of-the-art technique. Nonetheless, it also suffers from false matches at times. In order to mitigate this issue, we propose a fused architecture that combines our inexact matching pipeline with a state-of-the-art exact matching technique. We observe substantial gains with the fused model over the current state-of-the-art on multiple challenging datasets of varying sizes, with gains of up to about 21%.", "bibtex": "@inproceedings{NIPS2016_e56b06c5,\n author = {Subramaniam, Arulkumar and Chatterjee, Moitreya and Mittal, Anurag},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Neural Networks with Inexact Matching for Person Re-Identification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e56b06c51e1049195d7b26d043c478a0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e56b06c51e1049195d7b26d043c478a0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/e56b06c51e1049195d7b26d043c478a0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e56b06c51e1049195d7b26d043c478a0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e56b06c51e1049195d7b26d043c478a0-Reviews.html", "metareview": "", "pdf_size": 519110, "gs_citation": 129, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17431302958796484447&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Indian Institute of Technology Madras; Indian Institute of Technology Madras; Indian Institute of Technology Madras", "aff_domain": "cse.iitm.ac.in;gmail.com;cse.iitm.ac.in", "email": "cse.iitm.ac.in;gmail.com;cse.iitm.ac.in", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e56b06c51e1049195d7b26d043c478a0-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Indian Institute of Technology Madras", "aff_unique_dep": "", "aff_unique_url": "https://www.iitm.ac.in", "aff_unique_abbr": "IIT Madras", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madras", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "title": "Deep Submodular Functions: Definitions and Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6961", "id": "6961", "author_site": "Brian W Dolhansky, Jeffrey A Bilmes", "author": "Brian W Dolhansky; Jeff A. Bilmes", "abstract": "We propose and study a new class of submodular functions called deep submodular functions (DSFs). We define DSFs and situate them within the broader context of classes of submodular functions in relationship both to various matroid ranks and sums of concave composed with modular functions (SCMs). Notably, we find that DSFs constitute a strictly broader class than SCMs, thus motivating their use, but that they do not comprise all submodular functions. Interestingly, some DSFs can be seen as special cases of certain deep neural networks (DNNs), hence the name. Finally, we provide a method to learn DSFs in a max-margin framework, and offer preliminary results applying this both to synthetic and real-world data instances.", "bibtex": "@inproceedings{NIPS2016_7fea637f,\n author = {Dolhansky, Brian W and Bilmes, Jeff A},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Deep Submodular Functions: Definitions and Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7fea637fd6d02b8f0adf6f7dc36aed93-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7fea637fd6d02b8f0adf6f7dc36aed93-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7fea637fd6d02b8f0adf6f7dc36aed93-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7fea637fd6d02b8f0adf6f7dc36aed93-Reviews.html", "metareview": "", "pdf_size": 1235473, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8803185049431209051&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Dept. of Computer Science and Engineering\u2021; Dept. of Electrical Engineering\u2020", "aff_domain": "cs.washington.edu;uw.edu", "email": "cs.washington.edu;uw.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7fea637fd6d02b8f0adf6f7dc36aed93-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University of Washington;University of California, Berkeley", "aff_unique_dep": "Department of Computer Science and Engineering;Department of Electrical Engineering and Computer Sciences", "aff_unique_url": "https://www.cs.washington.edu;https://www.eecs.berkeley.edu", "aff_unique_abbr": "UW CSE;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "DeepMath - Deep Sequence Models for Premise Selection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7049", "id": "7049", "author_site": "Geoffrey Irving, Christian Szegedy, Alexander Alemi, Niklas Een, Francois Chollet, Josef Urban", "author": "Geoffrey Irving; Christian Szegedy; Alexander A Alemi; Niklas Een; Francois Chollet; Josef Urban", "abstract": "We study the effectiveness of neural sequence models for premise selection in automated theorem proving, a key bottleneck for progress in formalized mathematics. We propose a two stage approach for this task that yields good results for the premise selection task on the Mizar corpus while avoiding the hand-engineered features of existing state-of-the-art models. To our knowledge, this is the first time deep learning has been applied theorem proving on a large scale.", "bibtex": "@inproceedings{NIPS2016_f197002b,\n author = {Irving, Geoffrey and Szegedy, Christian and Alemi, Alexander A and Een, Niklas and Chollet, Francois and Urban, Josef},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {DeepMath - Deep Sequence Models for Premise Selection},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f197002b9a0853eca5e046d9ca4663d5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f197002b9a0853eca5e046d9ca4663d5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f197002b9a0853eca5e046d9ca4663d5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f197002b9a0853eca5e046d9ca4663d5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f197002b9a0853eca5e046d9ca4663d5-Reviews.html", "metareview": "", "pdf_size": 1222157, "gs_citation": 184, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11261324651156316340&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 2, "aff": "Google Inc.; Google Inc.; Google Inc.; Google Inc.; Google Inc.; Czech Technical University in Prague", "aff_domain": "google.com;google.com;google.com;google.com;google.com;gmail.com", "email": "google.com;google.com;google.com;google.com;google.com;gmail.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f197002b9a0853eca5e046d9ca4663d5-Abstract.html", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Google;Czech Technical University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.ctu.cz", "aff_unique_abbr": "Google;CTU", "aff_campus_unique_index": "0;0;0;0;0;1", "aff_campus_unique": "Mountain View;Prague", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;Czech Republic" }, { "title": "Dense Associative Memory for Pattern Recognition", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7426", "id": "7426", "author_site": "Dmitry Krotov, John J. Hopfield", "author": "Dmitry Krotov; John J. Hopfield", "abstract": "A model of associative memory is studied, which stores and reliably retrieves many more patterns than the number of neurons in the network. We propose a simple duality between this dense associative memory and neural networks commonly used in deep learning. On the associative memory side of this duality, a family of models that smoothly interpolates between two limiting cases can be constructed. One limit is referred to as the feature-matching mode of pattern recognition, and the other one as the prototype regime. On the deep learning side of the duality, this family corresponds to feedforward neural networks with one hidden layer and various activation functions, which transmit the activities of the visible neurons to the hidden layer. This family of activation functions includes logistics, rectified linear units, and rectified polynomials of higher degrees. The proposed duality makes it possible to apply energy-based intuition from associative memory to analyze computational properties of neural networks with unusual activation functions - the higher rectified polynomials which until now have not been used in deep learning. The utility of the dense memories is illustrated for two test cases: the logical gate XOR and the recognition of handwritten digits from the MNIST data set.", "bibtex": "@inproceedings{NIPS2016_eaae339c,\n author = {Krotov, Dmitry and Hopfield, John J.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dense Associative Memory for Pattern Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/eaae339c4d89fc102edd9dbdb6a28915-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/eaae339c4d89fc102edd9dbdb6a28915-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/eaae339c4d89fc102edd9dbdb6a28915-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/eaae339c4d89fc102edd9dbdb6a28915-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/eaae339c4d89fc102edd9dbdb6a28915-Reviews.html", "metareview": "", "pdf_size": 2741705, "gs_citation": 477, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8396780884289418593&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Simons Center for Systems Biology, Institute for Advanced Study, Princeton, USA; Princeton Neuroscience Institute, Princeton University, Princeton, USA", "aff_domain": "ias.edu;princeton.edu", "email": "ias.edu;princeton.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/eaae339c4d89fc102edd9dbdb6a28915-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Institute for Advanced Study;Princeton University", "aff_unique_dep": "Simons Center for Systems Biology;Princeton Neuroscience Institute", "aff_unique_url": "https://www.ias.edu;https://www.princeton.edu", "aff_unique_abbr": "IAS;Princeton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Princeton", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Density Estimation via Discrepancy Based Adaptive Sequential Partition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7090", "id": "7090", "author_site": "Dangna Li, Kun Yang, Wing Hung Wong", "author": "Dangna Li; Kun Yang; Wing Hung Wong", "abstract": "Given $iid$ observations from an unknown continuous distribution defined on some domain $\\Omega$, we propose a nonparametric method to learn a piecewise constant function to approximate the underlying probability density function. Our density estimate is a piecewise constant function defined on a binary partition of $\\Omega$. The key ingredient of the algorithm is to use discrepancy, a concept originates from Quasi Monte Carlo analysis, to control the partition process. The resulting algorithm is simple, efficient, and has provable convergence rate. We demonstrate empirically its efficiency as a density estimation method. We also show how it can be utilized to find good initializations for k-means.", "bibtex": "@inproceedings{NIPS2016_185c29dc,\n author = {Li, Dangna and Yang, Kun and Wong, Wing Hung},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Density Estimation via Discrepancy Based Adaptive Sequential Partition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/185c29dc24325934ee377cfda20e414c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/185c29dc24325934ee377cfda20e414c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/185c29dc24325934ee377cfda20e414c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/185c29dc24325934ee377cfda20e414c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/185c29dc24325934ee377cfda20e414c-Reviews.html", "metareview": "", "pdf_size": 3728570, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4812312375644330849&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "ICME, Stanford University; Google; Department of Statistics, Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/185c29dc24325934ee377cfda20e414c-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": "Institute for Computational and Mathematical Engineering;Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Depth from a Single Image by Harmonizing Overcomplete Local Network Predictions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7145", "id": "7145", "author_site": "Ayan Chakrabarti, Jingyu Shao, Greg Shakhnarovich", "author": "Ayan Chakrabarti; Jingyu Shao; Greg Shakhnarovich", "abstract": "A single color image can contain many cues informative towards different aspects of local geometric structure. We approach the problem of monocular depth estimation by using a neural network to produce a mid-level representation that summarizes these cues. This network is trained to characterize local scene geometry by predicting, at every image location, depth derivatives of different orders, orientations and scales. However, instead of a single estimate for each derivative, the network outputs probability distributions that allow it to express confidence about some coefficients, and ambiguity about others. Scene depth is then estimated by harmonizing this overcomplete set of network predictions, using a globalization procedure that finds a single consistent depth map that best matches all the local derivative distributions. We demonstrate the efficacy of this approach through evaluation on the NYU v2 depth data set.", "bibtex": "@inproceedings{NIPS2016_f3bd5ad5,\n author = {Chakrabarti, Ayan and Shao, Jingyu and Shakhnarovich, Greg},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Depth from a Single Image by Harmonizing Overcomplete Local Network Predictions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f3bd5ad57c8389a8a1a541a76be463bf-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f3bd5ad57c8389a8a1a541a76be463bf-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f3bd5ad57c8389a8a1a541a76be463bf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f3bd5ad57c8389a8a1a541a76be463bf-Reviews.html", "metareview": "", "pdf_size": 2598156, "gs_citation": 161, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15999446866666118590&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "TTI-Chicago, Chicago, IL; Dept. of Statistics, UCLA\u2217, Los Angeles, CA + TTI-Chicago, Chicago, IL; TTI-Chicago, Chicago, IL", "aff_domain": "ttic.edu;ucla.edu;ttic.edu", "email": "ttic.edu;ucla.edu;ttic.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f3bd5ad57c8389a8a1a541a76be463bf-Abstract.html", "aff_unique_index": "0;1+0;0", "aff_unique_norm": "Toyota Technological Institute at Chicago;University of California, Los Angeles", "aff_unique_dep": ";Department of Statistics", "aff_unique_url": "https://www.tti-chicago.org;https://www.ucla.edu", "aff_unique_abbr": "TTI-Chicago;UCLA", "aff_campus_unique_index": "0;1+0;0", "aff_campus_unique": "Chicago;Los Angeles", "aff_country_unique_index": "0;0+0;0", "aff_country_unique": "United States" }, { "title": "Designing smoothing functions for improved worst-case competitive ratio in online optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7255", "id": "7255", "author_site": "Reza Eghbali, Maryam Fazel", "author": "Reza Eghbali; Maryam Fazel", "abstract": "Online optimization covers problems such as online resource allocation, online bipartite matching, adwords (a central problem in e-commerce and advertising), and adwords with separable concave returns. We analyze the worst case competitive ratio of two primal-dual algorithms for a class of online convex (conic) optimization problems that contains the previous examples as special cases defined on the positive orthant. We derive a sufficient condition on the objective function that guarantees a constant worst case competitive ratio (greater than or equal to $\\frac{1}{2}$) for monotone objective functions. We provide new examples of online problems on the positive orthant % and the positive semidefinite cone that satisfy the sufficient condition. We show how smoothing can improve the competitive ratio of these algorithms, and in particular for separable functions, we show that the optimal smoothing can be derived by solving a convex optimization problem. This result allows us to directly optimize the competitive ratio bound over a class of smoothing functions, and hence design effective smoothing customized for a given cost function.", "bibtex": "@inproceedings{NIPS2016_3c1e4bd6,\n author = {Eghbali, Reza and Fazel, Maryam},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Designing smoothing functions for improved worst-case competitive ratio in online optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3c1e4bd67169b8153e0047536c9f541e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3c1e4bd67169b8153e0047536c9f541e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3c1e4bd67169b8153e0047536c9f541e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3c1e4bd67169b8153e0047536c9f541e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3c1e4bd67169b8153e0047536c9f541e-Reviews.html", "metareview": "", "pdf_size": 380319, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15462639617732656657&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff": "Department of Electrical Engineering, University of Washington; Department of Electrical Engineering, University of Washington", "aff_domain": "uw.edu;uw.edu", "email": "uw.edu;uw.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3c1e4bd67169b8153e0047536c9f541e-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Department of Electrical Engineering", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Dialog-based Language Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7317", "id": "7317", "author": "Jason E Weston", "abstract": "A long-term goal of machine learning research is to build an intelligent dialog agent. Most research in natural language understanding has focused on learning from fixed training sets of labeled data, with supervision either at the word level (tagging, parsing tasks) or sentence level (question answering, machine translation). This kind of supervision is not realistic of how humans learn, where language is both learned by, and used for, communication. In this work, we study dialog-based language learning, where supervision is given naturally and implicitly in the response of the dialog partner during the conversation. We study this setup in two domains: the bAbI dataset of (Weston et al., 2015) and large-scale question answering from (Dodge et al., 2015). We evaluate a set of baseline learning strategies on these tasks, and show that a novel model incorporating predictive lookahead is a promising approach for learning from a teacher's response. In particular, a surprising result is that it can learn to answer questions correctly without any reward-based supervision at all.", "bibtex": "@inproceedings{NIPS2016_07563a3f,\n author = {Weston, Jason E},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dialog-based Language Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/07563a3fe3bbe7e3ba84431ad9d055af-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/07563a3fe3bbe7e3ba84431ad9d055af-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/07563a3fe3bbe7e3ba84431ad9d055af-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/07563a3fe3bbe7e3ba84431ad9d055af-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/07563a3fe3bbe7e3ba84431ad9d055af-Reviews.html", "metareview": "", "pdf_size": 344404, "gs_citation": 158, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8807624213652874411&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Facebook AI Research, New York.", "aff_domain": "fb.com", "email": "fb.com", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/07563a3fe3bbe7e3ba84431ad9d055af-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Meta", "aff_unique_dep": "Facebook AI Research", "aff_unique_url": "https://research.facebook.com", "aff_unique_abbr": "FAIR", "aff_campus_unique_index": "0", "aff_campus_unique": "New York", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Differential Privacy without Sensitivity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7233", "id": "7233", "author_site": "Kentaro Minami, Hiromi Arai, Issei Sato, Hiroshi Nakagawa", "author": "Kentaro Minami; HItomi Arai; Issei Sato; Hiroshi Nakagawa", "abstract": "The exponential mechanism is a general method to construct a randomized estimator that satisfies $(\\varepsilon, 0)$-differential privacy. Recently, Wang et al. showed that the Gibbs posterior, which is a data-dependent probability distribution that contains the Bayesian posterior, is essentially equivalent to the exponential mechanism under certain boundedness conditions on the loss function. While the exponential mechanism provides a way to build an $(\\varepsilon, 0)$-differential private algorithm, it requires boundedness of the loss function, which is quite stringent for some learning problems. In this paper, we focus on $(\\varepsilon, \\delta)$-differential privacy of Gibbs posteriors with convex and Lipschitz loss functions. Our result extends the classical exponential mechanism, allowing the loss functions to have an unbounded sensitivity.", "bibtex": "@inproceedings{NIPS2016_a7aeed74,\n author = {Minami, Kentaro and Arai, HItomi and Sato, Issei and Nakagawa, Hiroshi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Differential Privacy without Sensitivity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a7aeed74714116f3b292a982238f83d2-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a7aeed74714116f3b292a982238f83d2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a7aeed74714116f3b292a982238f83d2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a7aeed74714116f3b292a982238f83d2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a7aeed74714116f3b292a982238f83d2-Reviews.html", "metareview": "", "pdf_size": 427934, "gs_citation": 82, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7052960427008149577&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "The University of Tokyo; The University of Tokyo; The University of Tokyo; The University of Tokyo", "aff_domain": "mist.i.u-tokyo.ac.jp;dl.itc.u-tokyo.ac.jp;k.u-tokyo.ac.jp;dl.itc.u-tokyo.ac.jp", "email": "mist.i.u-tokyo.ac.jp;dl.itc.u-tokyo.ac.jp;k.u-tokyo.ac.jp;dl.itc.u-tokyo.ac.jp", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a7aeed74714116f3b292a982238f83d2-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Diffusion-Convolutional Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7077", "id": "7077", "author_site": "James Atwood, Don Towsley", "author": "James Atwood; Don Towsley", "abstract": "We present diffusion-convolutional neural networks (DCNNs), a new model for graph-structured data. Through the introduction of a diffusion-convolution operation, we show how diffusion-based representations can be learned from graph-structured data and used as an effective basis for node classification. DCNNs have several attractive qualities, including a latent representation for graphical data that is invariant under isomorphism, as well as polynomial-time prediction and learning that can be represented as tensor operations and efficiently implemented on a GPU. Through several experiments with real structured datasets, we demonstrate that DCNNs are able to outperform probabilistic relational models and kernel-on-graph methods at relational node classification tasks.", "bibtex": "@inproceedings{NIPS2016_390e9825,\n author = {Atwood, James and Towsley, Don},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Diffusion-Convolutional Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/390e982518a50e280d8e2b535462ec1f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/390e982518a50e280d8e2b535462ec1f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/390e982518a50e280d8e2b535462ec1f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/390e982518a50e280d8e2b535462ec1f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/390e982518a50e280d8e2b535462ec1f-Reviews.html", "metareview": "", "pdf_size": 509831, "gs_citation": 1767, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17241458867032154450&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "College of Information and Computer Science, University of Massachusetts; College of Information and Computer Science, University of Massachusetts", "aff_domain": "cs.umass.edu;cs.umass.edu", "email": "cs.umass.edu;cs.umass.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/390e982518a50e280d8e2b535462ec1f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Massachusetts", "aff_unique_dep": "College of Information and Computer Science", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Dimension-Free Iteration Complexity of Finite Sum Optimization Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7333", "id": "7333", "author_site": "Yossi Arjevani, Ohad Shamir", "author": "Yossi Arjevani; Ohad Shamir", "abstract": "Many canonical machine learning problems boil down to a convex optimization problem with a finite sum structure. However, whereas much progress has been made in developing faster algorithms for this setting, the inherent limitations of these problems are not satisfactorily addressed by existing lower bounds. Indeed, current bounds focus on first-order optimization algorithms, and only apply in the often unrealistic regime where the number of iterations is less than $\\cO(d/n)$ (where $d$ is the dimension and $n$ is the number of samples). In this work, we extend the framework of Arjevani et al. \\cite{arjevani2015lower,arjevani2016iteration} to provide new lower bounds, which are dimension-free, and go beyond the assumptions of current bounds, thereby covering standard finite sum optimization methods, e.g., SAG, SAGA, SVRG, SDCA without duality, as well as stochastic coordinate-descent methods, such as SDCA and accelerated proximal SDCA.", "bibtex": "@inproceedings{NIPS2016_29957047,\n author = {Arjevani, Yossi and Shamir, Ohad},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dimension-Free Iteration Complexity of Finite Sum Optimization Problems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/299570476c6f0309545110c592b6a63b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/299570476c6f0309545110c592b6a63b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/299570476c6f0309545110c592b6a63b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/299570476c6f0309545110c592b6a63b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/299570476c6f0309545110c592b6a63b-Reviews.html", "metareview": "", "pdf_size": 529186, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17652521997258481675&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Weizmann Institute of Science; Weizmann Institute of Science", "aff_domain": "weizmann.ac.il;weizmann.ac.il", "email": "weizmann.ac.il;weizmann.ac.il", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/299570476c6f0309545110c592b6a63b-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Weizmann Institute of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.weizmann.org.il", "aff_unique_abbr": "Weizmann", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Dimensionality Reduction of Massive Sparse Datasets Using Coresets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7122", "id": "7122", "author_site": "Dan Feldman, Mikhail Volkov, Daniela Rus", "author": "Dan Feldman; Mikhail Volkov; Daniela Rus", "abstract": "In this paper we present a practical solution with performance guarantees to the problem of dimensionality reduction for very large scale sparse matrices. We show applications of our approach to computing the Principle Component Analysis (PCA) of any $n\\times d$ matrix, using one pass over the stream of its rows. Our solution uses coresets: a scaled subset of the $n$ rows that approximates their sum of squared distances to \\emph{every} $k$-dimensional \\emph{affine} subspace. An open theoretical problem has been to compute such a coreset that is independent of both $n$ and $d$. An open practical problem has been to compute a non-trivial approximation to the PCA of very large but sparse databases such as the Wikipedia document-term matrix in a reasonable time. We answer both of these questions affirmatively. Our main technical result is a new framework for deterministic coreset constructions based on a reduction to the problem of counting items in a stream.", "bibtex": "@inproceedings{NIPS2016_b7087c1f,\n author = {Feldman, Dan and Volkov, Mikhail and Rus, Daniela},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dimensionality Reduction of Massive Sparse Datasets Using Coresets},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b7087c1f4f89e63af8d46f3b20271153-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b7087c1f4f89e63af8d46f3b20271153-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b7087c1f4f89e63af8d46f3b20271153-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b7087c1f4f89e63af8d46f3b20271153-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b7087c1f4f89e63af8d46f3b20271153-Reviews.html", "metareview": "", "pdf_size": 498753, "gs_citation": 72, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17632034672469372835&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "University of Haifa; CSAIL, MIT; CSAIL, MIT", "aff_domain": "gmail.com;csail.mit.edu;csail.mit.edu", "email": "gmail.com;csail.mit.edu;csail.mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b7087c1f4f89e63af8d46f3b20271153-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Haifa;Massachusetts Institute of Technology", "aff_unique_dep": ";Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.haifa.ac.il;https://www.csail.mit.edu", "aff_unique_abbr": "UoH;MIT", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Israel;United States" }, { "title": "Direct Feedback Alignment Provides Learning in Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7100", "id": "7100", "author": "Arild N\u00f8kland", "abstract": "Artificial neural networks are most commonly trained with the back-propagation algorithm, where the gradient for learning is provided by back-propagating the error, layer by layer, from the output layer to the hidden layers. A recently discovered method called feedback-alignment shows that the weights used for propagating the error backward don't have to be symmetric with the weights used for propagation the activation forward. In fact, random feedback weights work evenly well, because the network learns how to make the feedback useful. In this work, the feedback alignment principle is used for training hidden layers more independently from the rest of the network, and from a zero initial condition. The error is propagated through fixed random feedback connections directly from the output layer to each hidden layer. This simple method is able to achieve zero training error even in convolutional networks and very deep networks, completely without error back-propagation. The method is a step towards biologically plausible machine learning because the error signal is almost local, and no symmetric or reciprocal weights are required. Experiments show that the test performance on MNIST and CIFAR is almost as good as those obtained with back-propagation for fully connected networks. If combined with dropout, the method achieves 1.45% error on the permutation invariant MNIST task.", "bibtex": "@inproceedings{NIPS2016_d490d7b4,\n author = {N\\o kland, Arild},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Direct Feedback Alignment Provides Learning in Deep Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d490d7b4576290fa60eb31b5fc917ad1-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d490d7b4576290fa60eb31b5fc917ad1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d490d7b4576290fa60eb31b5fc917ad1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d490d7b4576290fa60eb31b5fc917ad1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d490d7b4576290fa60eb31b5fc917ad1-Reviews.html", "metareview": "", "pdf_size": 1580089, "gs_citation": 553, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17873300510353620375&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Trondheim, Norway", "aff_domain": "gmail.com", "email": "gmail.com", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d490d7b4576290fa60eb31b5fc917ad1-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Norwegian University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ntnu.no", "aff_unique_abbr": "NTNU", "aff_campus_unique_index": "0", "aff_campus_unique": "Trondheim", "aff_country_unique_index": "0", "aff_country_unique": "Norway" }, { "title": "Discriminative Gaifman Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7376", "id": "7376", "author": "Mathias Niepert", "abstract": "We present discriminative Gaifman models, a novel family of relational machine learning models. Gaifman models learn feature representations bottom up from representations of locally connected and bounded-size regions of knowledge bases (KBs). Considering local and bounded-size neighborhoods of knowledge bases renders logical inference and learning tractable, mitigates the problem of overfitting, and facilitates weight sharing. Gaifman models sample neighborhoods of knowledge bases so as to make the learned relational models more robust to missing objects and relations which is a common situation in open-world KBs. We present the core ideas of Gaifman models and apply them to large-scale relational learning problems. We also discuss the ways in which Gaifman models relate to some existing relational machine learning approaches.", "bibtex": "@inproceedings{NIPS2016_7c4ede33,\n author = {Niepert, Mathias},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Discriminative Gaifman Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7c4ede33a62160a19586f6e26eaefacf-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7c4ede33a62160a19586f6e26eaefacf-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7c4ede33a62160a19586f6e26eaefacf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7c4ede33a62160a19586f6e26eaefacf-Reviews.html", "metareview": "", "pdf_size": 2107836, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6927923735546934800&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7c4ede33a62160a19586f6e26eaefacf-Abstract.html" }, { "title": "Disease Trajectory Maps", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7327", "id": "7327", "author_site": "Peter Schulam, Raman Arora", "author": "Peter Schulam; Raman Arora", "abstract": "Medical researchers are coming to appreciate that many diseases are in fact complex, heterogeneous syndromes composed of subpopulations that express different variants of a related complication. Longitudinal data extracted from individual electronic health records (EHR) offer an exciting new way to study subtle differences in the way these diseases progress over time. In this paper, we focus on answering two questions that can be asked using these databases of longitudinal EHR data. First, we want to understand whether there are individuals with similar disease trajectories and whether there are a small number of degrees of freedom that account for differences in trajectories across the population. Second, we want to understand how important clinical outcomes are associated with disease trajectories. To answer these questions, we propose the Disease Trajectory Map (DTM), a novel probabilistic model that learns low-dimensional representations of sparse and irregularly sampled longitudinal data. We propose a stochastic variational inference algorithm for learning the DTM that allows the model to scale to large modern medical datasets. To demonstrate the DTM, we analyze data collected on patients with the complex autoimmune disease, scleroderma. We find that DTM learns meaningful representations of disease trajectories and that the representations are significantly associated with important clinical outcomes.", "bibtex": "@inproceedings{NIPS2016_3baa271b,\n author = {Schulam, Peter and Arora, Raman},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Disease Trajectory Maps},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3baa271bc35fe054c86928f7016e8ae6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3baa271bc35fe054c86928f7016e8ae6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3baa271bc35fe054c86928f7016e8ae6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3baa271bc35fe054c86928f7016e8ae6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3baa271bc35fe054c86928f7016e8ae6-Reviews.html", "metareview": "", "pdf_size": 2193525, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7892893974413462670&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Dept. of Computer Science, Johns Hopkins University; Dept. of Computer Science, Johns Hopkins University", "aff_domain": "cs.jhu.edu;cs.jhu.edu", "email": "cs.jhu.edu;cs.jhu.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3baa271bc35fe054c86928f7016e8ae6-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "Dept. of Computer Science", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Disentangling factors of variation in deep representation using adversarial training", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6995", "id": "6995", "author_site": "Michael Mathieu, Junbo Jake Zhao, Junbo (Jake) Zhao, Aditya Ramesh, Pablo Sprechmann, Yann LeCun", "author": "Michael F Mathieu; Junbo Jake Zhao; Junbo Zhao; Aditya Ramesh; Pablo Sprechmann; Yann LeCun", "abstract": "We propose a deep generative model for learning to distill the hidden factors of variation within a set of labeled observations into two complementary codes. One code describes the factors of variation relevant to solving a specified task. The other code describes the remaining factors of variation that are irrelevant to solving this task. The only available source of supervision during the training process comes from our ability to distinguish among different observations belonging to the same category. Concrete examples include multiple images of the same object from different viewpoints, or multiple speech samples from the same speaker. In both of these instances, the factors of variation irrelevant to classification are implicitly expressed by intra-class variabilities, such as the relative position of an object in an image, or the linguistic content of an utterance. Most existing approaches for solving this problem rely heavily on having access to pairs of observations only sharing a single factor of variation, e.g. different objects observed in the exact same conditions. This assumption is often not encountered in realistic settings where data acquisition is not controlled and labels for the uninformative components are not available. In this work, we propose to overcome this limitation by augmenting deep convolutional autoencoders with a form of adversarial training. Both factors of variation are implicitly captured in the organization of the learned embedding space, and can be used for solving single-image analogies. Experimental results on synthetic and real datasets show that the proposed method is capable of disentangling the influences of style and content factors using a flexible representation, as well as generalizing to unseen styles or content classes.", "bibtex": "@inproceedings{NIPS2016_ef0917ea,\n author = {Mathieu, Michael F and Zhao, Junbo Jake and Zhao, Junbo and Ramesh, Aditya and Sprechmann, Pablo and LeCun, Yann},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Disentangling factors of variation in deep representation using adversarial training},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ef0917ea498b1665ad6c701057155abe-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ef0917ea498b1665ad6c701057155abe-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ef0917ea498b1665ad6c701057155abe-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ef0917ea498b1665ad6c701057155abe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ef0917ea498b1665ad6c701057155abe-Reviews.html", "metareview": "", "pdf_size": 784361, "gs_citation": 574, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17487472987754617699&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ef0917ea498b1665ad6c701057155abe-Abstract.html" }, { "title": "Distributed Flexible Nonlinear Tensor Factorization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7000", "id": "7000", "author_site": "Shandian Zhe, Kai Zhang, Pengyuan Wang, Kuang-chih Lee, Zenglin Xu, Yuan Qi, Zoubin Ghahramani", "author": "Shandian Zhe; Kai Zhang; Pengyuan Wang; Kuang-chih Lee; Zenglin Xu; Yuan Qi; Zoubin Ghahramani", "abstract": "Tensor factorization is a powerful tool to analyse multi-way data. Recently proposed nonlinear factorization methods, although capable of capturing complex relationships, are computationally quite expensive and may suffer a severe learning bias in case of extreme data sparsity. Therefore, we propose a distributed, flexible nonlinear tensor factorization model, which avoids the expensive computations and structural restrictions of the Kronecker-product in the existing TGP formulations, allowing an arbitrary subset of tensor entries to be selected for training. Meanwhile, we derive a tractable and tight variational evidence lower bound (ELBO) that enables highly decoupled, parallel computations and high-quality inference. Based on the new bound, we develop a distributed, key-value-free inference algorithm in the MapReduce framework, which can fully exploit the memory cache mechanism in fast MapReduce systems such as Spark. Experiments demonstrate the advantages of our method over several state-of-the-art approaches, in terms of both predictive performance and computational efficiency.", "bibtex": "@inproceedings{NIPS2016_99c5e07b,\n author = {Zhe, Shandian and Zhang, Kai and Wang, Pengyuan and Lee, Kuang-chih and Xu, Zenglin and Qi, Yuan and Ghahramani, Zoubin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Distributed Flexible Nonlinear Tensor Factorization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/99c5e07b4d5de9d18c350cdf64c5aa3d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/99c5e07b4d5de9d18c350cdf64c5aa3d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/99c5e07b4d5de9d18c350cdf64c5aa3d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/99c5e07b4d5de9d18c350cdf64c5aa3d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/99c5e07b4d5de9d18c350cdf64c5aa3d-Reviews.html", "metareview": "", "pdf_size": 354730, "gs_citation": 78, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2815517462215887578&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Dept. Computer Science, Purdue University; NEC Laboratories America, Princeton NJ; Dept. Marketing, University of Georgia at Athens; Yahoo! Research; Big Data Res. Center, School Comp. Sci. Eng., Univ. of Electr. Sci. & Tech. of China; Ant Financial Service Group, Alibaba; University of Cambridge", "aff_domain": "purdue.edu;nec-labs.com;uga.edu;yahoo-inc.com;uestc.edu.cn;outlook.com;cam.ac.uk", "email": "purdue.edu;nec-labs.com;uga.edu;yahoo-inc.com;uestc.edu.cn;outlook.com;cam.ac.uk", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/99c5e07b4d5de9d18c350cdf64c5aa3d-Abstract.html", "aff_unique_index": "0;1;2;3;4;5;6", "aff_unique_norm": "Purdue University;NEC Laboratories America;University of Georgia;Yahoo!;University of Electronic Science and Technology of China;Alibaba Group Holding Limited;University of Cambridge", "aff_unique_dep": "Department of Computer Science;;Department of Marketing;Yahoo! Research;School of Computer Science and Engineering;Ant Financial Service Group;", "aff_unique_url": "https://www.purdue.edu;https://www.nec-labs.com;https://www.uga.edu;https://research.yahoo.com;https://www.uestc.edu.cn;https://www.alibaba.com;https://www.cam.ac.uk", "aff_unique_abbr": "Purdue;NEC Labs;UGA;Yahoo!;UESTC;Alibaba;Cambridge", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Princeton;Athens;Cambridge", "aff_country_unique_index": "0;0;0;0;1;1;2", "aff_country_unique": "United States;China;United Kingdom" }, { "title": "Domain Separation Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7175", "id": "7175", "author_site": "Konstantinos Bousmalis, George Trigeorgis, Nathan Silberman, Dilip Krishnan, Dumitru Erhan", "author": "Konstantinos Bousmalis; George Trigeorgis; Nathan Silberman; Dilip Krishnan; Dumitru Erhan", "abstract": "The cost of large scale data collection and annotation often makes the application of machine learning algorithms to new tasks or datasets prohibitively expensive. One approach circumventing this cost is training models on synthetic data where annotations are provided automatically. Despite their appeal, such models often fail to generalize from synthetic to real images, necessitating domain adaptation algorithms to manipulate these models before they can be successfully applied. Existing approaches focus either on mapping representations from one domain to the other, or on learning to extract features that are invariant to the domain from which they were extracted. However, by focusing only on creating a mapping or shared representation between the two domains, they ignore the individual characteristics of each domain. We hypothesize that explicitly modeling what is unique to each domain can improve a model's ability to extract domain-invariant features. Inspired by work on private-shared component analysis, we explicitly learn to extract image representations that are partitioned into two subspaces: one component which is private to each domain and one which is shared across domains. Our model is trained to not only perform the task we care about in the source domain, but also to use the partitioned representation to reconstruct the images from both domains. Our novel architecture results in a model that outperforms the state-of-the-art on a range of unsupervised domain adaptation scenarios and additionally produces visualizations of the private and shared representations enabling interpretation of the domain adaptation process.", "bibtex": "@inproceedings{NIPS2016_45fbc6d3,\n author = {Bousmalis, Konstantinos and Trigeorgis, George and Silberman, Nathan and Krishnan, Dilip and Erhan, Dumitru},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Domain Separation Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/45fbc6d3e05ebd93369ce542e8f2322d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/45fbc6d3e05ebd93369ce542e8f2322d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/45fbc6d3e05ebd93369ce542e8f2322d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/45fbc6d3e05ebd93369ce542e8f2322d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/45fbc6d3e05ebd93369ce542e8f2322d-Reviews.html", "metareview": "", "pdf_size": 1098960, "gs_citation": 1882, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6506097335216287854&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Google Brain, Mountain View, CA; Imperial College London, London, UK + Google Brain, Mountain View, CA; Google Research, New York, NY; Google Research, Cambridge, MA; Google Brain, Mountain View, CA", "aff_domain": "google.com;imperial.ac.uk;google.com;google.com;google.com", "email": "google.com;imperial.ac.uk;google.com;google.com;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/45fbc6d3e05ebd93369ce542e8f2322d-Abstract.html", "aff_unique_index": "0;1+0;0;0;0", "aff_unique_norm": "Google;Imperial College London", "aff_unique_dep": "Google Brain;", "aff_unique_url": "https://brain.google.com;https://www.imperial.ac.uk", "aff_unique_abbr": "Google Brain;ICL", "aff_campus_unique_index": "0;1+0;2;3;0", "aff_campus_unique": "Mountain View;London;New York;Cambridge", "aff_country_unique_index": "0;1+0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Double Thompson Sampling for Dueling Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7336", "id": "7336", "author_site": "Huasen Wu, Xin Liu", "author": "Huasen Wu; Xin Liu", "abstract": "In this paper, we propose a Double Thompson Sampling (D-TS) algorithm for dueling bandit problems. As its name suggests, D-TS selects both the first and the second candidates according to Thompson Sampling. Specifically, D-TS maintains a posterior distribution for the preference matrix, and chooses the pair of arms for comparison according to two sets of samples independently drawn from the posterior distribution. This simple algorithm applies to general Copeland dueling bandits, including Condorcet dueling bandits as its special case. For general Copeland dueling bandits, we show that D-TS achieves $O(K^2 \\log T)$ regret. Moreover, using a back substitution argument, we refine the regret to $O(K \\log T + K^2 \\log \\log T)$ in Condorcet dueling bandits and many practical Copeland dueling bandits. In addition, we propose an enhancement of D-TS, referred to as D-TS+, that reduces the regret by carefully breaking ties. Experiments based on both synthetic and real-world data demonstrate that D-TS and D-TS$^+$ significantly improve the overall performance, in terms of regret and robustness.", "bibtex": "@inproceedings{NIPS2016_9de6d14f,\n author = {Wu, Huasen and Liu, Xin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Double Thompson Sampling for Dueling Bandits},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9de6d14fff9806d4bcd1ef555be766cd-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9de6d14fff9806d4bcd1ef555be766cd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9de6d14fff9806d4bcd1ef555be766cd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9de6d14fff9806d4bcd1ef555be766cd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9de6d14fff9806d4bcd1ef555be766cd-Reviews.html", "metareview": "", "pdf_size": 508991, "gs_citation": 111, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=998265296427010113&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of California, Davis; University of California, Davis", "aff_domain": "ucdavis.edu;ucdavis.edu", "email": "ucdavis.edu;ucdavis.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9de6d14fff9806d4bcd1ef555be766cd-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Davis", "aff_unique_dep": "", "aff_unique_url": "https://www.ucdavis.edu", "aff_unique_abbr": "UC Davis", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Davis", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Doubly Convolutional Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8513", "id": "8513", "author_site": "Shuangfei Zhai, Yu Cheng, Weining Lu, Zhongfei (Mark) Zhang", "author": "Shuangfei Zhai; Yu Cheng; Zhongfei (Mark) Zhang; Weining Lu", "abstract": "Building large models with parameter sharing accounts for most of the success of deep convolutional neural networks (CNNs). In this paper, we propose doubly convolutional neural networks (DCNNs), which significantly improve the performance of CNNs by further exploring this idea. In stead of allocating a set of convolutional filters that are independently learned, a DCNN maintains groups of filters where filters within each group are translated versions of each other. Practically, a DCNN can be easily implemented by a two-step convolution procedure, which is supported by most modern deep learning libraries. We perform extensive experiments on three image classification benchmarks: CIFAR-10, CIFAR-100 and ImageNet, and show that DCNNs consistently outperform other competing architectures. We have also verified that replacing a convolutional layer with a doubly convolutional layer at any depth of a CNN can improve its performance. Moreover, various design choices of DCNNs are demonstrated, which shows that DCNN can serve the dual purpose of building more accurate models and/or reducing the memory footprint without sacrificing the accuracy.", "bibtex": "@inproceedings{NIPS2016_b73dfe25,\n author = {Zhai, Shuangfei and Cheng, Yu and Zhang, Zhongfei (Mark) and Lu, Weining},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Doubly Convolutional Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b73dfe25b4b8714c029b37a6ad3006fa-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b73dfe25b4b8714c029b37a6ad3006fa-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b73dfe25b4b8714c029b37a6ad3006fa-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b73dfe25b4b8714c029b37a6ad3006fa-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b73dfe25b4b8714c029b37a6ad3006fa-Reviews.html", "metareview": "", "pdf_size": 1093971, "gs_citation": 95, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=585855361238178207&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Binghamton University; IBM T.J. Watson Research Center; Tsinghua University; Binghamton University", "aff_domain": "binghamton.edu;us.ibm.com;mails.tsinghua.edu.cn;cs.binghamton.edu", "email": "binghamton.edu;us.ibm.com;mails.tsinghua.edu.cn;cs.binghamton.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b73dfe25b4b8714c029b37a6ad3006fa-Abstract.html", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Binghamton University;IBM;Tsinghua University", "aff_unique_dep": ";Research Center;", "aff_unique_url": "https://www.binghamton.edu;https://www.ibm.com/research/watson;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Binghamton;IBM;THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";T.J. Watson", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "title": "Dual Decomposed Learning with Factorwise Oracle for Structural SVM of Large Output Domain", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6949", "id": "6949", "author_site": "Ian En-Hsu Yen, Xiangru Huang, Kai Zhong, Ruohan Zhang, Pradeep Ravikumar, Inderjit Dhillon", "author": "Ian En-Hsu Yen; Xiangru Huang; Kai Zhong; Ruohan Zhang; Pradeep K Ravikumar; Inderjit S Dhillon", "abstract": "Many applications of machine learning involve structured output with large domain, where learning of structured predictor is prohibitive due to repetitive calls to expensive inference oracle. In this work, we show that, by decomposing training of Structural Support Vector Machine (SVM) into a series of multiclass SVM problems connected through messages, one can replace expensive structured oracle with Factorwise Maximization Oracle (FMO) that allows efficient implementation of complexity sublinear to the factor domain. A Greedy Direction Method of Multiplier (GDMM) algorithm is proposed to exploit sparsity of messages which guarantees $\\epsilon$ sub-optimality after $O(log(1/\\epsilon))$ passes of FMO calls. We conduct experiments on chain-structured problems and fully-connected problems of large output domains. The proposed approach is orders-of-magnitude faster than the state-of-the-art training algorithms for Structural SVM.", "bibtex": "@inproceedings{NIPS2016_7e837225,\n author = {Yen, Ian En-Hsu and Huang, Xiangru and Zhong, Kai and Zhang, Ruohan and Ravikumar, Pradeep K and Dhillon, Inderjit S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dual Decomposed Learning with Factorwise Oracle for Structural SVM of Large Output Domain},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7e83722522e8aeb7512b7075311316b7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7e83722522e8aeb7512b7075311316b7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7e83722522e8aeb7512b7075311316b7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7e83722522e8aeb7512b7075311316b7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7e83722522e8aeb7512b7075311316b7-Reviews.html", "metareview": "", "pdf_size": 573854, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11176227156448025672&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7e83722522e8aeb7512b7075311316b7-Abstract.html" }, { "title": "Dual Learning for Machine Translation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7037", "id": "7037", "author_site": "Di He, Yingce Xia, Tao Qin, Liwei Wang, Nenghai Yu, Tie-Yan Liu, Wei-Ying Ma", "author": "Di He; Yingce Xia; Tao Qin; Liwei Wang; Nenghai Yu; Tie-Yan Liu; Wei-Ying Ma", "abstract": "While neural machine translation (NMT) is making good progress in the past two years, tens of millions of bilingual sentence pairs are needed for its training. However, human labeling is very costly. To tackle this training data bottleneck, we develop a dual-learning mechanism, which can enable an NMT system to automatically learn from unlabeled data through a dual-learning game. This mechanism is inspired by the following observation: any machine translation task has a dual task, e.g., English-to-French translation (primal) versus French-to-English translation (dual); the primal and dual tasks can form a closed loop, and generate informative feedback signals to train the translation models, even if without the involvement of a human labeler. In the dual-learning mechanism, we use one agent to represent the model for the primal task and the other agent to represent the model for the dual task, then ask them to teach each other through a reinforcement learning process. Based on the feedback signals generated during this process (e.g., the language-model likelihood of the output of a model, and the reconstruction error of the original sentence after the primal and dual translations), we can iteratively update the two models until convergence (e.g., using the policy gradient methods). We call the corresponding approach to neural machine translation \\emph{dual-NMT}. Experiments show that dual-NMT works very well on English$\\leftrightarrow$French translation; especially, by learning from monolingual data (with 10\\% bilingual data for warm start), it achieves a comparable accuracy to NMT trained from the full bilingual data for the French-to-English translation task.", "bibtex": "@inproceedings{NIPS2016_5b69b9cb,\n author = {He, Di and Xia, Yingce and Qin, Tao and Wang, Liwei and Yu, Nenghai and Liu, Tie-Yan and Ma, Wei-Ying},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dual Learning for Machine Translation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5b69b9cb83065d403869739ae7f0995e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5b69b9cb83065d403869739ae7f0995e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/5b69b9cb83065d403869739ae7f0995e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5b69b9cb83065d403869739ae7f0995e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5b69b9cb83065d403869739ae7f0995e-Reviews.html", "metareview": "", "pdf_size": 260397, "gs_citation": 1100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15841765927830550600&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Key Laboratory of Machine Perception (MOE), School of EECS, Peking University; University of Science and Technology of China; Microsoft Research; Key Laboratory of Machine Perception (MOE), School of EECS, Peking University; University of Science and Technology of China; Microsoft Research; Microsoft Research", "aff_domain": "cis.pku.edu.cn;cis.pku.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;microsoft.com;microsoft.com;microsoft.com", "email": "cis.pku.edu.cn;cis.pku.edu.cn;mail.ustc.edu.cn;ustc.edu.cn;microsoft.com;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5b69b9cb83065d403869739ae7f0995e-Abstract.html", "aff_unique_index": "0;1;2;0;1;2;2", "aff_unique_norm": "Peking University;University of Science and Technology of China;Microsoft", "aff_unique_dep": "School of EECS;;Microsoft Research", "aff_unique_url": "http://www.pku.edu.cn;http://www.ustc.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "PKU;USTC;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;1;1", "aff_country_unique": "China;United States" }, { "title": "Dual Space Gradient Descent for Online Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7030", "id": "7030", "author_site": "Trung Le, Tu Nguyen, Vu Nguyen, Dinh Phung", "author": "Trung Le; Tu Nguyen; Vu Nguyen; Dinh Phung", "abstract": "One crucial goal in kernel online learning is to bound the model size. Common approaches employ budget maintenance procedures to restrict the model sizes using removal, projection, or merging strategies. Although projection and merging, in the literature, are known to be the most effective strategies, they demand extensive computation whilst removal strategy fails to retain information of the removed vectors. An alternative way to address the model size problem is to apply random features to approximate the kernel function. This allows the model to be maintained directly in the random feature space, hence effectively resolve the curse of kernelization. However, this approach still suffers from a serious shortcoming as it needs to use a high dimensional random feature space to achieve a sufficiently accurate kernel approximation. Consequently, it leads to a significant increase in the computational cost. To address all of these aforementioned challenges, we present in this paper the Dual Space Gradient Descent (DualSGD), a novel framework that utilizes random features as an auxiliary space to maintain information from data points removed during budget maintenance. Consequently, our approach permits the budget to be maintained in a simple, direct and elegant way while simultaneously mitigating the impact of the dimensionality issue on learning performance. We further provide convergence analysis and extensively conduct experiments on five real-world datasets to demonstrate the predictive performance and scalability of our proposed method in comparison with the state-of-the-art baselines.", "bibtex": "@inproceedings{NIPS2016_43351f7b,\n author = {Le, Trung and Nguyen, Tu and Nguyen, Vu and Phung, Dinh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dual Space Gradient Descent for Online Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/43351f7bf9a215be70c2c2caa7555002-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/43351f7bf9a215be70c2c2caa7555002-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/43351f7bf9a215be70c2c2caa7555002-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/43351f7bf9a215be70c2c2caa7555002-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/43351f7bf9a215be70c2c2caa7555002-Reviews.html", "metareview": "", "pdf_size": 469318, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4684003472022378829&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Centre for Pattern Recognition and Data Analytics, Deakin University, Australia; Centre for Pattern Recognition and Data Analytics, Deakin University, Australia; Centre for Pattern Recognition and Data Analytics, Deakin University, Australia; Centre for Pattern Recognition and Data Analytics, Deakin University, Australia", "aff_domain": "deakin.edu.au;deakin.edu.au;deakin.edu.au;deakin.edu.au", "email": "deakin.edu.au;deakin.edu.au;deakin.edu.au;deakin.edu.au", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/43351f7bf9a215be70c2c2caa7555002-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Deakin University", "aff_unique_dep": "Centre for Pattern Recognition and Data Analytics", "aff_unique_url": "https://www.deakin.edu.au", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Dueling Bandits: Beyond Condorcet Winners to General Tournament Solutions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6957", "id": "6957", "author_site": "Siddartha Ramamohan, Arun Rajkumar, Shivani Agarwal, Shivani Agarwal", "author": "Siddartha Y. Ramamohan; Arun Rajkumar; Shivani Agarwal; Shivani Agarwal", "abstract": "Recent work on deriving $O(\\log T)$ anytime regret bounds for stochastic dueling bandit problems has considered mostly Condorcet winners, which do not always exist, and more recently, winners defined by the Copeland set, which do always exist. In this work, we consider a broad notion of winners defined by tournament solutions in social choice theory, which include the Copeland set as a special case but also include several other notions of winners such as the top cycle, uncovered set, and Banks set, and which, like the Copeland set, always exist. We develop a family of UCB-style dueling bandit algorithms for such general tournament solutions, and show $O(\\log T)$ anytime regret bounds for them. Experiments confirm the ability of our algorithms to achieve low regret relative to the target winning set of interest.", "bibtex": "@inproceedings{NIPS2016_fccb3cdc,\n author = {Ramamohan, Siddartha Y. and Rajkumar, Arun and Agarwal, Shivani and Agarwal, Shivani},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dueling Bandits: Beyond Condorcet Winners to General Tournament Solutions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fccb3cdc9acc14a6e70a12f74560c026-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fccb3cdc9acc14a6e70a12f74560c026-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/fccb3cdc9acc14a6e70a12f74560c026-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fccb3cdc9acc14a6e70a12f74560c026-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fccb3cdc9acc14a6e70a12f74560c026-Reviews.html", "metareview": "", "pdf_size": 1157548, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15006605780057940560&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fccb3cdc9acc14a6e70a12f74560c026-Abstract.html" }, { "title": "Dynamic Filter Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6931", "id": "6931", "author_site": "Xu Jia, Bert De Brabandere, Tinne Tuytelaars, Luc V Gool", "author": "Xu Jia; Bert De Brabandere; Tinne Tuytelaars; Luc V. Gool", "abstract": "In a traditional convolutional layer, the learned filters stay fixed after training. In contrast, we introduce a new framework, the Dynamic Filter Network, where filters are generated dynamically conditioned on an input. We show that this architecture is a powerful one, with increased flexibility thanks to its adaptive nature, yet without an excessive increase in the number of model parameters. A wide variety of filtering operation can be learned this way, including local spatial transformations, but also others like selective (de)blurring or adaptive feature extraction. Moreover, multiple such layers can be combined, e.g. in a recurrent architecture. We demonstrate the effectiveness of the dynamic filter network on the tasks of video and stereo prediction, and reach state-of-the-art performance on the moving MNIST dataset with a much smaller model. By visualizing the learned filters, we illustrate that the network has picked up flow information by only looking at unlabelled training data. This suggests that the network can be used to pretrain networks for various supervised tasks in an unsupervised way, like optical flow and depth estimation.", "bibtex": "@inproceedings{NIPS2016_8bf1211f,\n author = {Jia, Xu and De Brabandere, Bert and Tuytelaars, Tinne and Gool, Luc V},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dynamic Filter Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8bf1211fd4b7b94528899de0a43b9fb3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8bf1211fd4b7b94528899de0a43b9fb3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8bf1211fd4b7b94528899de0a43b9fb3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8bf1211fd4b7b94528899de0a43b9fb3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8bf1211fd4b7b94528899de0a43b9fb3-Reviews.html", "metareview": "", "pdf_size": 884083, "gs_citation": 1269, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6402271951989310264&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "ESAT-PSI, KU Leuven, iMinds; ESAT-PSI, KU Leuven, iMinds; ESAT-PSI, KU Leuven, iMinds; ESAT-PSI, KU Leuven, iMinds+D-ITET, ETH Zurich", "aff_domain": "esat.kuleuven.be;esat.kuleuven.be;esat.kuleuven.be;vision.ee.ethz.ch", "email": "esat.kuleuven.be;esat.kuleuven.be;esat.kuleuven.be;vision.ee.ethz.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8bf1211fd4b7b94528899de0a43b9fb3-Abstract.html", "aff_unique_index": "0;0;0;0+1", "aff_unique_norm": "KU Leuven;ETH Zurich", "aff_unique_dep": "ESAT-PSI;D-ITET", "aff_unique_url": "https://www.kuleuven.be;https://www.ethz.ch", "aff_unique_abbr": "KU Leuven;ETHZ", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;0;0+1", "aff_country_unique": "Belgium;Switzerland" }, { "title": "Dynamic Mode Decomposition with Reproducing Kernels for Koopman Spectral Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6969", "id": "6969", "author": "Yoshinobu Kawahara", "abstract": "A spectral analysis of the Koopman operator, which is an infinite dimensional linear operator on an observable, gives a (modal) description of the global behavior of a nonlinear dynamical system without any explicit prior knowledge of its governing equations. In this paper, we consider a spectral analysis of the Koopman operator in a reproducing kernel Hilbert space (RKHS). We propose a modal decomposition algorithm to perform the analysis using finite-length data sequences generated from a nonlinear system. The algorithm is in essence reduced to the calculation of a set of orthogonal bases for the Krylov matrix in RKHS and the eigendecomposition of the projection of the Koopman operator onto the subspace spanned by the bases. The algorithm returns a decomposition of the dynamics into a finite number of modes, and thus it can be thought of as a feature extraction procedure for a nonlinear dynamical system. Therefore, we further consider applications in machine learning using extracted features with the presented analysis. We illustrate the method on the applications using synthetic and real-world data.", "bibtex": "@inproceedings{NIPS2016_1728efbd,\n author = {Kawahara, Yoshinobu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dynamic Mode Decomposition with Reproducing Kernels for Koopman Spectral Analysis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1728efbda81692282ba642aafd57be3a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1728efbda81692282ba642aafd57be3a-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1728efbda81692282ba642aafd57be3a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1728efbda81692282ba642aafd57be3a-Reviews.html", "metareview": "", "pdf_size": 345082, "gs_citation": 122, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5921954004568692289&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1728efbda81692282ba642aafd57be3a-Abstract.html" }, { "title": "Dynamic Network Surgery for Efficient DNNs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7374", "id": "7374", "author_site": "Yiwen Guo, Anbang Yao, Yurong Chen", "author": "Yiwen Guo; Anbang Yao; Yurong Chen", "abstract": "Deep learning has become a ubiquitous technology to improve machine intelligence. However, most of the existing deep models are structurally very complex, making them difficult to be deployed on the mobile platforms with limited computational power. In this paper, we propose a novel network compression method called dynamic network surgery, which can remarkably reduce the network complexity by making on-the-fly connection pruning. Unlike the previous methods which accomplish this task in a greedy way, we properly incorporate connection splicing into the whole process to avoid incorrect pruning and make it as a continual network maintenance. The effectiveness of our method is proved with experiments. Without any accuracy loss, our method can efficiently compress the number of parameters in LeNet-5 and AlexNet by a factor of $\\bm{108}\\times$ and $\\bm{17.7}\\times$ respectively, proving that it outperforms the recent pruning method by considerable margins. Code and some models are available at https://github.com/yiwenguo/Dynamic-Network-Surgery.", "bibtex": "@inproceedings{NIPS2016_2823f479,\n author = {Guo, Yiwen and Yao, Anbang and Chen, Yurong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dynamic Network Surgery for Efficient DNNs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2823f4797102ce1a1aec05359cc16dd9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2823f4797102ce1a1aec05359cc16dd9-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2823f4797102ce1a1aec05359cc16dd9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2823f4797102ce1a1aec05359cc16dd9-Reviews.html", "metareview": "", "pdf_size": 1521886, "gs_citation": 1405, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8401919167089401684&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Intel Labs China; Intel Labs China; Intel Labs China", "aff_domain": "intel.com;intel.com;intel.com", "email": "intel.com;intel.com;intel.com", "github": "https://github.com/yiwenguo/Dynamic-Network-Surgery", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2823f4797102ce1a1aec05359cc16dd9-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Intel", "aff_unique_dep": "Intel Labs", "aff_unique_url": "https://www.intel.cn", "aff_unique_abbr": "Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Dynamic matrix recovery from incomplete observations under an exact low-rank constraint", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7339", "id": "7339", "author_site": "Liangbei Xu, Mark Davenport", "author": "Liangbei Xu; Mark Davenport", "abstract": "Low-rank matrix factorizations arise in a wide variety of applications -- including recommendation systems, topic models, and source separation, to name just a few. In these and many other applications, it has been widely noted that by incorporating temporal information and allowing for the possibility of time-varying models, significant improvements are possible in practice. However, despite the reported superior empirical performance of these dynamic models over their static counterparts, there is limited theoretical justification for introducing these more complex models. In this paper we aim to address this gap by studying the problem of recovering a dynamically evolving low-rank matrix from incomplete observations. First, we propose the locally weighted matrix smoothing (LOWEMS) framework as one possible approach to dynamic matrix recovery. We then establish error bounds for LOWEMS in both the {\\em matrix sensing} and {\\em matrix completion} observation models. Our results quantify the potential benefits of exploiting dynamic constraints both in terms of recovery accuracy and sample complexity. To illustrate these benefits we provide both synthetic and real-world experimental results.", "bibtex": "@inproceedings{NIPS2016_6449f44a,\n author = {Xu, Liangbei and Davenport, Mark},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Dynamic matrix recovery from incomplete observations under an exact low-rank constraint},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6449f44a102fde848669bdd9eb6b76fa-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6449f44a102fde848669bdd9eb6b76fa-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6449f44a102fde848669bdd9eb6b76fa-Reviews.html", "metareview": "", "pdf_size": 301518, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10355679384430891529&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Electrical and Computer Engineering, Georgia Institute of Technology; Department of Electrical and Computer Engineering, Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu", "email": "gatech.edu;gatech.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6449f44a102fde848669bdd9eb6b76fa-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Atlanta", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Edge-exchangeable graphs and sparsity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7022", "id": "7022", "author_site": "Diana Cai, Trevor Campbell, Tamara Broderick", "author": "Diana Cai; Trevor Campbell; Tamara Broderick", "abstract": "Many popular network models rely on the assumption of (vertex) exchangeability, in which the distribution of the graph is invariant to relabelings of the vertices. However, the Aldous-Hoover theorem guarantees that these graphs are dense or empty with probability one, whereas many real-world graphs are sparse. We present an alternative notion of exchangeability for random graphs, which we call edge exchangeability, in which the distribution of a graph sequence is invariant to the order of the edges. We demonstrate that edge-exchangeable models, unlike models that are traditionally vertex exchangeable, can exhibit sparsity. To do so, we outline a general framework for graph generative models; by contrast to the pioneering work of Caron and Fox (2015), models within our framework are stationary across steps of the graph sequence. In particular, our model grows the graph by instantiating more latent atoms of a single random measure as the dataset size increases, rather than adding new atoms to the measure.", "bibtex": "@inproceedings{NIPS2016_1a0a283b,\n author = {Cai, Diana and Campbell, Trevor and Broderick, Tamara},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Edge-exchangeable graphs and sparsity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1a0a283bfe7c549dee6c638a05200e32-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1a0a283bfe7c549dee6c638a05200e32-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/1a0a283bfe7c549dee6c638a05200e32-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1a0a283bfe7c549dee6c638a05200e32-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1a0a283bfe7c549dee6c638a05200e32-Reviews.html", "metareview": "", "pdf_size": 556486, "gs_citation": 100, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5668643472538668965&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Dept. of Statistics, U. Chicago; CSAIL, MIT; CSAIL, MIT", "aff_domain": "uchicago.edu;mit.edu;csail.mit.edu", "email": "uchicago.edu;mit.edu;csail.mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1a0a283bfe7c549dee6c638a05200e32-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Chicago;Massachusetts Institute of Technology", "aff_unique_dep": "Dept. of Statistics;Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.uchicago.edu;https://www.csail.mit.edu", "aff_unique_abbr": "UChicago;MIT", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Globally Convergent Stochastic Optimization for Canonical Correlation Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7344", "id": "7344", "author_site": "Weiran Wang, Jialei Wang, Dan Garber, Dan Garber, Nati Srebro", "author": "Weiran Wang; Jialei Wang; Dan Garber; Dan Garber; Nati Srebro", "abstract": "We study the stochastic optimization of canonical correlation analysis (CCA), whose objective is nonconvex and does not decouple over training samples. Although several stochastic gradient based optimization algorithms have been recently proposed to solve this problem, no global convergence guarantee was provided by any of them. Inspired by the alternating least squares/power iterations formulation of CCA, and the shift-and-invert preconditioning method for PCA, we propose two globally convergent meta-algorithms for CCA, both of which transform the original problem into sequences of least squares problems that need only be solved approximately. We instantiate the meta-algorithms with state-of-the-art SGD methods and obtain time complexities that significantly improve upon that of previous work. Experimental results demonstrate their superior performance.", "bibtex": "@inproceedings{NIPS2016_42998cf3,\n author = {Wang, Weiran and Wang, Jialei and Garber, Dan and Garber, Dan and Srebro, Nati},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Globally Convergent Stochastic Optimization for Canonical Correlation Analysis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/42998cf32d552343bc8e460416382dca-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/42998cf32d552343bc8e460416382dca-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/42998cf32d552343bc8e460416382dca-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/42998cf32d552343bc8e460416382dca-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/42998cf32d552343bc8e460416382dca-Reviews.html", "metareview": "", "pdf_size": 250269, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4694092654802301398&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/42998cf32d552343bc8e460416382dca-Abstract.html" }, { "title": "Efficient High-Order Interaction-Aware Feature Selection Based on Conditional Mutual Information", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6987", "id": "6987", "author_site": "Alexander Shishkin, Anastasia Bezzubtseva, Alexey Drutsa, Ilia Shishkov, Ekaterina Gladkikh, Gleb Gusev, Pavel Serdyukov", "author": "Alexander Shishkin; Anastasia Bezzubtseva; Alexey Drutsa; Ilia Shishkov; Ekaterina Gladkikh; Gleb Gusev; Pavel Serdyukov", "abstract": "This study introduces a novel feature selection approach CMICOT, which is a further evolution of filter methods with sequential forward selection (SFS) whose scoring functions are based on conditional mutual information (MI). We state and study a novel saddle point (max-min) optimization problem to build a scoring function that is able to identify joint interactions between several features. This method fills the gap of MI-based SFS techniques with high-order dependencies. In this high-dimensional case, the estimation of MI has prohibitively high sample complexity. We mitigate this cost using a greedy approximation and binary representatives what makes our technique able to be effectively used. The superiority of our approach is demonstrated by comparison with recently proposed interaction-aware filters and several interaction-agnostic state-of-the-art ones on ten publicly available benchmark datasets.", "bibtex": "@inproceedings{NIPS2016_d5e2fbef,\n author = {Shishkin, Alexander and Bezzubtseva, Anastasia and Drutsa, Alexey and Shishkov, Ilia and Gladkikh, Ekaterina and Gusev, Gleb and Serdyukov, Pavel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient High-Order Interaction-Aware Feature Selection Based on Conditional Mutual Information},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d5e2fbef30a4eb668a203060ec8e5eef-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d5e2fbef30a4eb668a203060ec8e5eef-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d5e2fbef30a4eb668a203060ec8e5eef-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d5e2fbef30a4eb668a203060ec8e5eef-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d5e2fbef30a4eb668a203060ec8e5eef-Reviews.html", "metareview": "", "pdf_size": 596182, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2601884283722025753&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Yandex; Yandex; Yandex; Yandex; Yandex; Yandex; Yandex", "aff_domain": "yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru", "email": "yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d5e2fbef30a4eb668a203060ec8e5eef-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Yandex", "aff_unique_dep": "", "aff_unique_url": "https://yandex.com", "aff_unique_abbr": "Yandex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Russian Federation" }, { "title": "Efficient Neural Codes under Metabolic Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7201", "id": "7201", "author_site": "Zhuo Wang, Xue-Xin Wei, Alan A Stocker, Daniel Lee", "author": "Zhuo Wang; Xue-Xin Wei; Alan Stocker; Daniel D Lee", "abstract": "Neural codes are inevitably shaped by various kinds of biological constraints, \\emph{e.g.} noise and metabolic cost. Here we formulate a coding framework which explicitly deals with noise and the metabolic costs associated with the neural representation of information, and analytically derive the optimal neural code for monotonic response functions and arbitrary stimulus distributions. For a single neuron, the theory predicts a family of optimal response functions depending on the metabolic budget and noise characteristics. Interestingly, the well-known histogram equalization solution can be viewed as a special case when metabolic resources are unlimited. For a pair of neurons, our theory suggests that under more severe metabolic constraints, ON-OFF coding is an increasingly more efficient coding scheme compared to ON-ON or OFF-OFF. The advantage could be as large as one-fold, substantially larger than the previous estimation. Some of these predictions could be generalized to the case of large neural populations. In particular, these analytical results may provide a theoretical basis for the predominant segregation into ON- and OFF-cells in early visual processing areas. Overall, we provide a unified framework for optimal neural codes with monotonic tuning curves in the brain, and makes predictions that can be directly tested with physiology experiments.", "bibtex": "@inproceedings{NIPS2016_e6c2dc3d,\n author = {Wang, Zhuo and Wei, Xue-Xin and Stocker, Alan A and Lee, Daniel D},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Neural Codes under Metabolic Constraints},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e6c2dc3dee4a51dcec3a876aa2339a78-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e6c2dc3dee4a51dcec3a876aa2339a78-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/e6c2dc3dee4a51dcec3a876aa2339a78-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e6c2dc3dee4a51dcec3a876aa2339a78-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e6c2dc3dee4a51dcec3a876aa2339a78-Reviews.html", "metareview": "", "pdf_size": 466840, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11976037075804786054&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Mathematics, University of Pennsylvania + Center for Neural Science, New York University; Department of Psychology, University of Pennsylvania + Department of Statistics and Center for Theoretical Neuroscience, Columbia University; Department of Psychology, University of Pennsylvania; Department of Electrical and System Engineering, University of Pennsylvania", "aff_domain": "nyu.edu;gmail.com;sas.upenn.edu;seas.upenn.edu", "email": "nyu.edu;gmail.com;sas.upenn.edu;seas.upenn.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e6c2dc3dee4a51dcec3a876aa2339a78-Abstract.html", "aff_unique_index": "0+1;0+2;0;0", "aff_unique_norm": "University of Pennsylvania;New York University;Columbia University", "aff_unique_dep": "Department of Mathematics;Center for Neural Science;Department of Statistics", "aff_unique_url": "https://www.upenn.edu;https://www.nyu.edu;https://www.columbia.edu", "aff_unique_abbr": "UPenn;NYU;Columbia", "aff_campus_unique_index": "1;", "aff_campus_unique": ";New York", "aff_country_unique_index": "0+0;0+0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Nonparametric Smoothness Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8500", "id": "8500", "author_site": "Shashank Singh, Simon Du, Barnabas Poczos", "author": "Shashank Singh; Simon S Du; Barnabas Poczos", "abstract": "Sobolev quantities (norms, inner products, and distances) of probability density functions are important in the theory of nonparametric statistics, but have rarely been used in practice, partly due to a lack of practical estimators. They also include, as special cases, L^2 quantities which are used in many applications. We propose and analyze a family of estimators for Sobolev quantities of unknown probability density functions. We bound the finite-sample bias and variance of our estimators, finding that they are generally minimax rate-optimal. Our estimators are significantly more computationally tractable than previous estimators, and exhibit a statistical/computational trade-off allowing them to adapt to computational constraints. We also draw theoretical connections to recent work on fast two-sample testing and empirically validate our estimators on synthetic data.", "bibtex": "@inproceedings{NIPS2016_acc3e040,\n author = {Singh, Shashank and Du, Simon S and Poczos, Barnabas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Nonparametric Smoothness Estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/acc3e0404646c57502b480dc052c4fe1-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/acc3e0404646c57502b480dc052c4fe1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/acc3e0404646c57502b480dc052c4fe1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/acc3e0404646c57502b480dc052c4fe1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/acc3e0404646c57502b480dc052c4fe1-Reviews.html", "metareview": "", "pdf_size": 444418, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10980164792080024378&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "andrew.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/acc3e0404646c57502b480dc052c4fe1-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Efficient Second Order Online Learning by Sketching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7050", "id": "7050", "author_site": "Haipeng Luo, Alekh Agarwal, Nicol\u00f2 Cesa-Bianchi, John Langford", "author": "Haipeng Luo; Alekh Agarwal; Nicol\u00f2 Cesa-Bianchi; John Langford", "abstract": "We propose Sketched Online Newton (SON), an online second order learning algorithm that enjoys substantially improved regret guarantees for ill-conditioned data. SON is an enhanced version of the Online Newton Step, which, via sketching techniques enjoys a running time linear in the dimension and sketch size. We further develop sparse forms of the sketching methods (such as Oja's rule), making the computation linear in the sparsity of features. Together, the algorithm eliminates all computational obstacles in previous second order online learning approaches.", "bibtex": "@inproceedings{NIPS2016_15de21c6,\n author = {Luo, Haipeng and Agarwal, Alekh and Cesa-Bianchi, Nicol\\`{o} and Langford, John},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient Second Order Online Learning by Sketching},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/15de21c670ae7c3f6f3f1f37029303c9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/15de21c670ae7c3f6f3f1f37029303c9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/15de21c670ae7c3f6f3f1f37029303c9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/15de21c670ae7c3f6f3f1f37029303c9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/15de21c670ae7c3f6f3f1f37029303c9-Reviews.html", "metareview": "", "pdf_size": 387924, "gs_citation": 119, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9032290102898611673&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Princeton University, Princeton, NJ USA; Microsoft Research, New York, NY USA; Universit\u00e0 degli Studi di Milano, Italy; Microsoft Research, New York, NY USA", "aff_domain": "cs.princeton.edu;microsoft.com;unimi.it;microsoft.com", "email": "cs.princeton.edu;microsoft.com;unimi.it;microsoft.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/15de21c670ae7c3f6f3f1f37029303c9-Abstract.html", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Princeton University;Microsoft;Universit\u00e0 degli Studi di Milano", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "https://www.princeton.edu;https://www.microsoft.com/en-us/research;https://www.unimi.it", "aff_unique_abbr": "Princeton;MSR;UniMi", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Princeton;New York;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Italy" }, { "title": "Efficient and Robust Spiking Neural Circuit for Navigation Inspired by Echolocating Bats", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7016", "id": "7016", "author_site": "Bipin Rajendran, Pulkit Tandon, Yash H Malviya", "author": "Pulkit Tandon; Yash H Malviya; Bipin Rajendran", "abstract": "We demonstrate a spiking neural circuit for azimuth angle detection inspired by the echolocation circuits of the Horseshoe bat Rhinolophus ferrumequinum and utilize it to devise a model for navigation and target tracking, capturing several key aspects of information transmission in biology. Our network, using only a simple local-information based sensor implementing the cardioid angular gain function, operates at biological spike rate of 10 Hz. The network tracks large angular targets (60 degrees) within 1 sec with a 10% RMS error. We study the navigational ability of our model for foraging and target localization tasks in a forest of obstacles and show that our network requires less than 200X spike-triggered decisions, while suffering only a 1% loss in performance compared to a proportional-integral-derivative controller, in the presence of 50% additive noise. Superior performance can be obtained at a higher average spike rate of 100 Hz and 1000 Hz, but even the accelerated networks requires 20X and 10X lesser decisions respectively, demonstrating the superior computational efficiency of bio-inspired information processing systems.", "bibtex": "@inproceedings{NIPS2016_a86c450b,\n author = {Tandon, Pulkit and Malviya, Yash H and Rajendran, Bipin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient and Robust Spiking Neural Circuit for Navigation Inspired by Echolocating Bats},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a86c450b76fb8c371afead6410d55534-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a86c450b76fb8c371afead6410d55534-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a86c450b76fb8c371afead6410d55534-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a86c450b76fb8c371afead6410d55534-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a86c450b76fb8c371afead6410d55534-Reviews.html", "metareview": "", "pdf_size": 1869914, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9711086021085597515&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Indian Institute of Technology, Bombay; Indian Institute of Technology, Bombay; New Jersey Institute of Technology", "aff_domain": "gmail.com;gmail.com;njit.edu", "email": "gmail.com;gmail.com;njit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a86c450b76fb8c371afead6410d55534-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Indian Institute of Technology Bombay;New Jersey Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitb.ac.in;https://www.njit.edu", "aff_unique_abbr": "IIT Bombay;NJIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Bombay;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "India;United States" }, { "title": "Efficient state-space modularization for planning: theory, behavioral and neural signatures", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7123", "id": "7123", "author_site": "Daniel McNamee, Daniel M Wolpert, Mate Lengyel", "author": "Daniel McNamee; Daniel M. Wolpert; Mate Lengyel", "abstract": "Even in state-spaces of modest size, planning is plagued by the \u201ccurse of dimensionality\u201d. This problem is particularly acute in human and animal cognition given the limited capacity of working memory, and the time pressures under which planning often occurs in the natural environment. Hierarchically organized modular representations have long been suggested to underlie the capacity of biological systems to efficiently and flexibly plan in complex environments. However, the principles underlying efficient modularization remain obscure, making it difficult to identify its behavioral and neural signatures. Here, we develop a normative theory of efficient state-space representations which partitions an environment into distinct modules by minimizing the average (information theoretic) description length of planning within the environment, thereby optimally trading off the complexity of planning across and within modules. We show that such optimal representations provide a unifying account for a diverse range of hitherto unrelated phenomena at multiple levels of behavior and neural representation.", "bibtex": "@inproceedings{NIPS2016_10907813,\n author = {McNamee, Daniel and Wolpert, Daniel M and Lengyel, Mate},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Efficient state-space modularization for planning: theory, behavioral and neural signatures},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/10907813b97e249163587e6246612e21-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/10907813b97e249163587e6246612e21-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/10907813b97e249163587e6246612e21-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/10907813b97e249163587e6246612e21-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/10907813b97e249163587e6246612e21-Reviews.html", "metareview": "", "pdf_size": 4038499, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12443152625556595667&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Computational and Biological Learning Lab, Department of Engineering, University of Cambridge; Computational and Biological Learning Lab, Department of Engineering, University of Cambridge; Computational and Biological Learning Lab, Department of Engineering, University of Cambridge", "aff_domain": "eng.cam.ac.uk;eng.cam.ac.uk;eng.cam.ac.uk", "email": "eng.cam.ac.uk;eng.cam.ac.uk;eng.cam.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/10907813b97e249163587e6246612e21-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "Department of Engineering", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Eliciting Categorical Data for Optimal Aggregation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7144", "id": "7144", "author_site": "Chien-Ju Ho, Rafael Frongillo, Yiling Chen", "author": "Chien-Ju Ho; Rafael Frongillo; Yiling Chen", "abstract": "Models for collecting and aggregating categorical data on crowdsourcing platforms typically fall into two broad categories: those assuming agents honest and consistent but with heterogeneous error rates, and those assuming agents strategic and seek to maximize their expected reward. The former often leads to tractable aggregation of elicited data, while the latter usually focuses on optimal elicitation and does not consider aggregation. In this paper, we develop a Bayesian model, wherein agents have differing quality of information, but also respond to incentives. Our model generalizes both categories and enables the joint exploration of optimal elicitation and aggregation. This model enables our exploration, both analytically and experimentally, of optimal aggregation of categorical data and optimal multiple-choice interface design.", "bibtex": "@inproceedings{NIPS2016_018b59ce,\n author = {Ho, Chien-Ju and Frongillo, Rafael and Chen, Yiling},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Eliciting Categorical Data for Optimal Aggregation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/018b59ce1fd616d874afad0f44ba338d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/018b59ce1fd616d874afad0f44ba338d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/018b59ce1fd616d874afad0f44ba338d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/018b59ce1fd616d874afad0f44ba338d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/018b59ce1fd616d874afad0f44ba338d-Reviews.html", "metareview": "", "pdf_size": 2069093, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1021976291989033513&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Cornell University; CU Boulder; Harvard University", "aff_domain": "cornell.edu;colorado.edu;seas.harvard.edu", "email": "cornell.edu;colorado.edu;seas.harvard.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/018b59ce1fd616d874afad0f44ba338d-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Cornell University;University of Colorado Boulder;Harvard University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cornell.edu;https://www.colorado.edu;https://www.harvard.edu", "aff_unique_abbr": "Cornell;CU Boulder;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boulder", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "End-to-End Goal-Driven Web Navigation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7283", "id": "7283", "author_site": "Rodrigo Nogueira, Kyunghyun Cho", "author": "Rodrigo Nogueira; Kyunghyun Cho", "abstract": "We propose a goal-driven web navigation as a benchmark task for evaluating an agent with abilities to understand natural language and plan on partially observed environments. In this challenging task, an agent navigates through a website, which is represented as a graph consisting of web pages as nodes and hyperlinks as directed edges, to find a web page in which a query appears. The agent is required to have sophisticated high-level reasoning based on natural languages and efficient sequential decision-making capability to succeed. We release a software tool, called WebNav, that automatically transforms a website into this goal-driven web navigation task, and as an example, we make WikiNav, a dataset constructed from the English Wikipedia. We extensively evaluate different variants of neural net based artificial agents on WikiNav and observe that the proposed goal-driven web navigation well reflects the advances in models, making it a suitable benchmark for evaluating future progress. Furthermore, we extend the WikiNav with question-answer pairs from Jeopardy! and test the proposed agent based on recurrent neural networks against strong inverted index based search engines. The artificial agents trained on WikiNav outperforms the engined based approaches, demonstrating the capability of the proposed goal-driven navigation as a good proxy for measuring the progress in real-world tasks such as focused crawling and question-answering.", "bibtex": "@inproceedings{NIPS2016_1579779b,\n author = {Nogueira, Rodrigo and Cho, Kyunghyun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {End-to-End Goal-Driven Web Navigation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1579779b98ce9edb98dd85606f2c119d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1579779b98ce9edb98dd85606f2c119d-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1579779b98ce9edb98dd85606f2c119d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1579779b98ce9edb98dd85606f2c119d-Reviews.html", "metareview": "", "pdf_size": 477062, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6990148551323397938&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Tandon School of Engineering, New York University; Courant Institute of Mathematical Sciences, New York University", "aff_domain": "nyu.edu;nyu.edu", "email": "nyu.edu;nyu.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1579779b98ce9edb98dd85606f2c119d-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "Tandon School of Engineering", "aff_unique_url": "https://engineering.nyu.edu", "aff_unique_abbr": "NYU Tandon", "aff_campus_unique_index": "0;0", "aff_campus_unique": "New York", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "End-to-End Kernel Learning with Supervised Convolutional Kernel Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7400", "id": "7400", "author": "Julien Mairal", "abstract": "In this paper, we introduce a new image representation based on a multilayer kernel machine. Unlike traditional kernel methods where data representation is decoupled from the prediction task, we learn how to shape the kernel with supervision. We proceed by first proposing improvements of the recently-introduced convolutional kernel networks (CKNs) in the context of unsupervised learning; then, we derive backpropagation rules to take advantage of labeled training data. The resulting model is a new type of convolutional neural network, where optimizing the filters at each layer is equivalent to learning a linear subspace in a reproducing kernel Hilbert space (RKHS). We show that our method achieves reasonably competitive performance for image classification on some standard ``deep learning'' datasets such as CIFAR-10 and SVHN, and also for image super-resolution, demonstrating the applicability of our approach to a large variety of image-related tasks.", "bibtex": "@inproceedings{NIPS2016_fc8001f8,\n author = {Mairal, Julien},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {End-to-End Kernel Learning with Supervised Convolutional Kernel Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fc8001f834f6a5f0561080d134d53d29-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fc8001f834f6a5f0561080d134d53d29-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/fc8001f834f6a5f0561080d134d53d29-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fc8001f834f6a5f0561080d134d53d29-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fc8001f834f6a5f0561080d134d53d29-Reviews.html", "metareview": "", "pdf_size": 350572, "gs_citation": 158, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5841670465764385695&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "aff": "Inria\u2217", "aff_domain": "inria.fr", "email": "inria.fr", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fc8001f834f6a5f0561080d134d53d29-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "Inria", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Equality of Opportunity in Supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6907", "id": "6907", "author_site": "Moritz Hardt, Eric Price, Eric Price, Nati Srebro", "author": "Moritz Hardt; Eric Price; ecprice; Nati Srebro", "abstract": "We propose a criterion for discrimination against a specified sensitive attribute in supervised learning, where the goal is to predict some target based on available features. Assuming data about the predictor, target, and membership in the protected group are available, we show how to optimally adjust any learned predictor so as to remove discrimination according to our definition. Our framework also improves incentives by shifting the cost of poor classification from disadvantaged groups to the decision maker, who can respond by improving the classification accuracy.", "bibtex": "@inproceedings{NIPS2016_9d268236,\n author = {Hardt, Moritz and Price, Eric and Price, Eric and Srebro, Nati},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Equality of Opportunity in Supervised Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9d2682367c3935defcb1f9e247a97c0d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9d2682367c3935defcb1f9e247a97c0d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9d2682367c3935defcb1f9e247a97c0d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9d2682367c3935defcb1f9e247a97c0d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9d2682367c3935defcb1f9e247a97c0d-Reviews.html", "metareview": "", "pdf_size": 743238, "gs_citation": 5788, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2062984936384963570&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 18, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9d2682367c3935defcb1f9e247a97c0d-Abstract.html" }, { "title": "Error Analysis of Generalized Nystr\u00f6m Kernel Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7291", "id": "7291", "author_site": "Hong Chen, Haifeng Xia, Heng Huang, Weidong Cai", "author": "Hong Chen; Haifeng Xia; Heng Huang; Weidong Cai", "abstract": "Nystr\\\"{o}m method has been used successfully to improve the computational efficiency of kernel ridge regression (KRR). Recently, theoretical analysis of Nystr\\\"{o}m KRR, including generalization bound and convergence rate, has been established based on reproducing kernel Hilbert space (RKHS) associated with the symmetric positive semi-definite kernel. However, in real world applications, RKHS is not always optimal and kernel function is not necessary to be symmetric or positive semi-definite. In this paper, we consider the generalized Nystr\\\"{o}m kernel regression (GNKR) with $\\ell_2$ coefficient regularization, where the kernel just requires the continuity and boundedness. Error analysis is provided to characterize its generalization performance and the column norm sampling is introduced to construct the refined hypothesis space. In particular, the fast learning rate with polynomial decay is reached for the GNKR. Experimental analysis demonstrates the satisfactory performance of GNKR with the column norm sampling.", "bibtex": "@inproceedings{NIPS2016_bb7946e7,\n author = {Chen, Hong and Xia, Haifeng and Huang, Heng and Cai, Weidong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Error Analysis of Generalized Nystr\\\"{o}m Kernel Regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/bb7946e7d85c81a9e69fee1cea4a087c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/bb7946e7d85c81a9e69fee1cea4a087c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/bb7946e7d85c81a9e69fee1cea4a087c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/bb7946e7d85c81a9e69fee1cea4a087c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/bb7946e7d85c81a9e69fee1cea4a087c-Reviews.html", "metareview": "", "pdf_size": 340112, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3031682850251298229&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Computer Science and Engineering, University of Texas at Arlington; Mathematics and Statistics, Huazhong Agricultural University; School of Information Technologies, University of Sydney; Computer Science and Engineering, University of Texas at Arlington", "aff_domain": "mail.hzau.edu.cn;gmail.com;sydney.edu.au;uta.edu", "email": "mail.hzau.edu.cn;gmail.com;sydney.edu.au;uta.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/bb7946e7d85c81a9e69fee1cea4a087c-Abstract.html", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Texas at Arlington;Huazhong Agricultural University;University of Sydney", "aff_unique_dep": "Computer Science and Engineering;Mathematics and Statistics;School of Information Technologies", "aff_unique_url": "https://www.uta.edu;http://www.hzau.edu.cn/;https://www.sydney.edu.au", "aff_unique_abbr": "UTA;HZAU;USYD", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Arlington;;Sydney", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United States;China;Australia" }, { "title": "Estimating Nonlinear Neural Response Functions using GP Priors and Kronecker Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6895", "id": "6895", "author_site": "Cristina Savin, Gasper Tkacik", "author": "Cristina Savin; Gasper Tkacik", "abstract": "Jointly characterizing neural responses in terms of several external variables promises novel insights into circuit function, but remains computationally prohibitive in practice. Here we use gaussian process (GP) priors and exploit recent advances in fast GP inference and learning based on Kronecker methods, to efficiently estimate multidimensional nonlinear tuning functions. Our estimator require considerably less data than traditional methods and further provides principled uncertainty estimates. We apply these tools to hippocampal recordings during open field exploration and use them to characterize the joint dependence of CA1 responses on the position of the animal and several other variables, including the animal's speed, direction of motion, and network oscillations.Our results provide an unprecedentedly detailed quantification of the tuning of hippocampal neurons. The model's generality suggests that our approach can be used to estimate neural response properties in other brain regions.", "bibtex": "@inproceedings{NIPS2016_8d9fc230,\n author = {Savin, Cristina and Tkacik, Gasper},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Estimating Nonlinear Neural Response Functions using GP Priors and Kronecker Methods},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8d9fc2308c8f28d2a7d2f6f48801c705-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8d9fc2308c8f28d2a7d2f6f48801c705-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8d9fc2308c8f28d2a7d2f6f48801c705-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8d9fc2308c8f28d2a7d2f6f48801c705-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8d9fc2308c8f28d2a7d2f6f48801c705-Reviews.html", "metareview": "", "pdf_size": 8873614, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11524423100145271535&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "IST Austria; IST Austria", "aff_domain": "ist.ac.at;ist.ac.at", "email": "ist.ac.at;ist.ac.at", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8d9fc2308c8f28d2a7d2f6f48801c705-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Institute of Science and Technology Austria", "aff_unique_dep": "", "aff_unique_url": "https://www.ist.ac.at", "aff_unique_abbr": "IST Austria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Austria" }, { "title": "Estimating the Size of a Large Network and its Communities from a Random Sample", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7364", "id": "7364", "author_site": "Lin Chen, Amin Karbasi, Forrest W. Crawford", "author": "Lin Chen; Amin Karbasi; Forrest W. Crawford", "abstract": "Most real-world networks are too large to be measured or studied directly and there is substantial interest in estimating global network properties from smaller sub-samples. One of the most important global properties is the number of vertices/nodes in the network. Estimating the number of vertices in a large network is a major challenge in computer science, epidemiology, demography, and intelligence analysis. In this paper we consider a population random graph G = (V;E) from the stochastic block model (SBM) with K communities/blocks. A sample is obtained by randomly choosing a subset W and letting G(W) be the induced subgraph in G of the vertices in W. In addition to G(W), we observe the total degree of each sampled vertex and its block membership. Given this partial information, we propose an efficient PopULation Size Estimation algorithm, called PULSE, that accurately estimates the size of the whole population as well as the size of each community. To support our theoretical analysis, we perform an exhaustive set of experiments to study the effects of sample size, K, and SBM model parameters on the accuracy of the estimates. The experimental results also demonstrate that PULSE significantly outperforms a widely-used method called the network scale-up estimator in a wide variety of scenarios.", "bibtex": "@inproceedings{NIPS2016_8c00dee2,\n author = {Chen, Lin and Karbasi, Amin and Crawford, Forrest W.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Estimating the Size of a Large Network and its Communities from a Random Sample},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8c00dee24c9878fea090ed070b44f1ab-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8c00dee24c9878fea090ed070b44f1ab-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8c00dee24c9878fea090ed070b44f1ab-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8c00dee24c9878fea090ed070b44f1ab-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8c00dee24c9878fea090ed070b44f1ab-Reviews.html", "metareview": "", "pdf_size": 674925, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4965323516290972186&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Department of Electrical Engineering + Yale Institute for Network Science; Department of Electrical Engineering + Yale Institute for Network Science; Yale Institute for Network Science + Department of Biostatistics, Yale University", "aff_domain": "yale.edu;yale.edu;yale.edu", "email": "yale.edu;yale.edu;yale.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8c00dee24c9878fea090ed070b44f1ab-Abstract.html", "aff_unique_index": "0+1;0+1;1+1", "aff_unique_norm": "Institution not specified;Yale University", "aff_unique_dep": "Department of Electrical Engineering;Institute for Network Science", "aff_unique_url": ";https://www.yale.edu", "aff_unique_abbr": ";Yale", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1+1", "aff_country_unique": ";United States" }, { "title": "Estimating the class prior and posterior from noisy positives and unlabeled data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6938", "id": "6938", "author_site": "Shantanu Jain, Martha White, Predrag Radivojac", "author": "Shantanu Jain; Martha White; Predrag Radivojac", "abstract": "We develop a classification algorithm for estimating posterior distributions from positive-unlabeled data, that is robust to noise in the positive labels and effective for high-dimensional data. In recent years, several algorithms have been proposed to learn from positive-unlabeled data; however, many of these contributions remain theoretical, performing poorly on real high-dimensional data that is typically contaminated with noise. We build on this previous work to develop two practical classification algorithms that explicitly model the noise in the positive labels and utilize univariate transforms built on discriminative classifiers. We prove that these univariate transforms preserve the class prior, enabling estimation in the univariate space and avoiding kernel density estimation for high-dimensional data. The theoretical development and parametric and nonparametric algorithms proposed here constitute an important step towards wide-spread use of robust classification algorithms for positive-unlabeled data.", "bibtex": "@inproceedings{NIPS2016_79a49b3e,\n author = {Jain, Shantanu and White, Martha and Radivojac, Predrag},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Estimating the class prior and posterior from noisy positives and unlabeled data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/79a49b3e3762632813f9e35f4ba53d6c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/79a49b3e3762632813f9e35f4ba53d6c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/79a49b3e3762632813f9e35f4ba53d6c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/79a49b3e3762632813f9e35f4ba53d6c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/79a49b3e3762632813f9e35f4ba53d6c-Reviews.html", "metareview": "", "pdf_size": 278891, "gs_citation": 145, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5262171199808422348&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science, Indiana University, Bloomington, Indiana, USA; Department of Computer Science, Indiana University, Bloomington, Indiana, USA; Department of Computer Science, Indiana University, Bloomington, Indiana, USA", "aff_domain": "indiana.edu;indiana.edu;indiana.edu", "email": "indiana.edu;indiana.edu;indiana.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/79a49b3e3762632813f9e35f4ba53d6c-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Indiana University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.indiana.edu", "aff_unique_abbr": "IU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Bloomington", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Exact Recovery of Hard Thresholding Pursuit", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7296", "id": "7296", "author_site": "Xiaotong Yuan, Ping Li, Tong Zhang", "author": "Xiaotong Yuan; Ping Li; Tong Zhang", "abstract": "The Hard Thresholding Pursuit (HTP) is a class of truncated gradient descent methods for finding sparse solutions of $\\ell_0$-constrained loss minimization problems. The HTP-style methods have been shown to have strong approximation guarantee and impressive numerical performance in high dimensional statistical learning applications. However, the current theoretical treatment of these methods has traditionally been restricted to the analysis of parameter estimation consistency. It remains an open problem to analyze the support recovery performance (a.k.a., sparsistency) of this type of methods for recovering the global minimizer of the original NP-hard problem. In this paper, we bridge this gap by showing, for the first time, that exact recovery of the global sparse minimizer is possible for HTP-style methods under restricted strong condition number bounding conditions. We further show that HTP-style methods are able to recover the support of certain relaxed sparse solutions without assuming bounded restricted strong condition number. Numerical results on simulated data confirms our theoretical predictions.", "bibtex": "@inproceedings{NIPS2016_e9b73bcc,\n author = {Yuan, Xiaotong and Li, Ping and Zhang, Tong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Exact Recovery of Hard Thresholding Pursuit},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e9b73bccd1762555582b513ff9d02492-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e9b73bccd1762555582b513ff9d02492-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/e9b73bccd1762555582b513ff9d02492-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e9b73bccd1762555582b513ff9d02492-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e9b73bccd1762555582b513ff9d02492-Reviews.html", "metareview": "", "pdf_size": 139730, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1358171096644941981&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e9b73bccd1762555582b513ff9d02492-Abstract.html" }, { "title": "Examples are not enough, learn to criticize! Criticism for Interpretability", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7428", "id": "7428", "author_site": "Been Kim, Sanmi Koyejo, Rajiv Khanna", "author": "Been Kim; Rajiv Khanna; Oluwasanmi O Koyejo", "abstract": "Example-based explanations are widely used in the effort to improve the interpretability of highly complex distributions. However, prototypes alone are rarely sufficient to represent the gist of the complexity. In order for users to construct better mental models and understand complex data distributions, we also need {\\em criticism} to explain what are \\textit{not} captured by prototypes. Motivated by the Bayesian model criticism framework, we develop \\texttt{MMD-critic} which efficiently learns prototypes and criticism, designed to aid human interpretability. A human subject pilot study shows that the \\texttt{MMD-critic} selects prototypes and criticism that are useful to facilitate human understanding and reasoning. We also evaluate the prototypes selected by \\texttt{MMD-critic} via a nearest prototype classifier, showing competitive performance compared to baselines.", "bibtex": "@inproceedings{NIPS2016_5680522b,\n author = {Kim, Been and Khanna, Rajiv and Koyejo, Oluwasanmi O},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Examples are not enough, learn to criticize! Criticism for Interpretability},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5680522b8e2bb01943234bce7bf84534-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5680522b8e2bb01943234bce7bf84534-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/5680522b8e2bb01943234bce7bf84534-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5680522b8e2bb01943234bce7bf84534-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5680522b8e2bb01943234bce7bf84534-Reviews.html", "metareview": "", "pdf_size": 2087486, "gs_citation": 1228, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15209592161416514572&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Allen Institute for AI; UT Austin; UIUC", "aff_domain": "csail.mit.edu;utexas.edu;illinois.edu", "email": "csail.mit.edu;utexas.edu;illinois.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5680522b8e2bb01943234bce7bf84534-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Allen Institute for AI;University of Texas at Austin;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;", "aff_unique_url": "https://allenai.org;https://www.utexas.edu;https://www illinois.edu", "aff_unique_abbr": "AI2;UT Austin;UIUC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Austin;Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Exploiting Tradeoffs for Exact Recovery in Heterogeneous Stochastic Block Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6913", "id": "6913", "author_site": "Amin Jalali, Qiyang Han, Ioana Dumitriu, Maryam Fazel", "author": "Amin Jalali; Qiyang Han; Ioana Dumitriu; Maryam Fazel", "abstract": "The Stochastic Block Model (SBM) is a widely used random graph model for networks with communities. Despite the recent burst of interest in community detection under the SBM from statistical and computational points of view, there are still gaps in understanding the fundamental limits of recovery. In this paper, we consider the SBM in its full generality, where there is no restriction on the number and sizes of communities or how they grow with the number of nodes, as well as on the connectivity probabilities inside or across communities. For such stochastic block models, we provide guarantees for exact recovery via a semidefinite program as well as upper and lower bounds on SBM parameters for exact recoverability. Our results exploit the tradeoffs among the various parameters of heterogenous SBM and provide recovery guarantees for many new interesting SBM configurations.", "bibtex": "@inproceedings{NIPS2016_57bafb2c,\n author = {Jalali, Amin and Han, Qiyang and Dumitriu, Ioana and Fazel, Maryam},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Exploiting Tradeoffs for Exact Recovery in Heterogeneous Stochastic Block Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/57bafb2c2dfeefba931bb03a835b1fa9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/57bafb2c2dfeefba931bb03a835b1fa9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/57bafb2c2dfeefba931bb03a835b1fa9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/57bafb2c2dfeefba931bb03a835b1fa9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/57bafb2c2dfeefba931bb03a835b1fa9-Reviews.html", "metareview": "", "pdf_size": 179631, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7677002549187585663&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Department of Electrical Engineering, University of Washington; Department of Statistics, University of Washington; Department of Mathematics, University of Washington; Department of Electrical Engineering, University of Washington", "aff_domain": "uw.edu;uw.edu;uw.edu;uw.edu", "email": "uw.edu;uw.edu;uw.edu;uw.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/57bafb2c2dfeefba931bb03a835b1fa9-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Department of Electrical Engineering", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Exploiting the Structure: Stochastic Gradient Methods Using Raw Clusters", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7238", "id": "7238", "author_site": "Zeyuan Allen-Zhu, Yang Yuan, Karthik Sridharan", "author": "Zeyuan Allen-Zhu; Yang Yuan; Karthik Sridharan", "abstract": "The amount of data available in the world is growing faster than our ability to deal with it. However, if we take advantage of the internal structure, data may become much smaller for machine learning purposes. In this paper we focus on one of the fundamental machine learning tasks, empirical risk minimization (ERM), and provide faster algorithms with the help from the clustering structure of the data. We introduce a simple notion of raw clustering that can be efficiently computed from the data, and propose two algorithms based on clustering information. Our accelerated algorithm ClusterACDM is built on a novel Haar transformation applied to the dual space of the ERM problem, and our variance-reduction based algorithm ClusterSVRG introduces a new gradient estimator using clustering. Our algorithms outperform their classical counterparts ACDM and SVRG respectively.", "bibtex": "@inproceedings{NIPS2016_4b025079,\n author = {Allen-Zhu, Zeyuan and Yuan, Yang and Sridharan, Karthik},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Exploiting the Structure: Stochastic Gradient Methods Using Raw Clusters},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/4b0250793549726d5c1ea3906726ebfe-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/4b0250793549726d5c1ea3906726ebfe-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/4b0250793549726d5c1ea3906726ebfe-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/4b0250793549726d5c1ea3906726ebfe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/4b0250793549726d5c1ea3906726ebfe-Reviews.html", "metareview": "", "pdf_size": 467195, "gs_citation": 33, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6961416149770069017&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Princeton University / IAS; Cornell University; Cornell University", "aff_domain": "csail.mit.edu;cs.cornell.edu;cs.cornell.edu", "email": "csail.mit.edu;cs.cornell.edu;cs.cornell.edu", "github": "", "project": "https://arxiv.org/abs/1602.02151", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/4b0250793549726d5c1ea3906726ebfe-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Princeton University;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.cornell.edu", "aff_unique_abbr": "Princeton;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Exponential Family Embeddings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6899", "id": "6899", "author_site": "Maja Rudolph, Francisco Ruiz, Stephan Mandt, David Blei", "author": "Maja Rudolph; Francisco Ruiz; Stephan Mandt; David Blei", "abstract": "Word embeddings are a powerful approach to capturing semantic similarity among terms in a vocabulary. In this paper, we develop exponential family embeddings, which extends the idea of word embeddings to other types of high-dimensional data. As examples, we studied several types of data: neural data with real-valued observations, count data from a market basket analysis, and ratings data from a movie recommendation system. The main idea is that each observation is modeled conditioned on a set of latent embeddings and other observations, called the context, where the way the context is defined depends on the problem. In language the context is the surrounding words; in neuroscience the context is close-by neurons; in market basket data the context is other items in the shopping cart. Each instance of an embedding defines the context, the exponential family of conditional distributions, and how the embedding vectors are shared across data. We infer the embeddings with stochastic gradient descent, with an algorithm that connects closely to generalized linear models. On all three of our applications\u2014neural activity of zebrafish, users\u2019 shopping behavior, and movie ratings\u2014we found that exponential family embedding models are more effective than other dimension reduction methods. They better reconstruct held-out data and find interesting qualitative structure.", "bibtex": "@inproceedings{NIPS2016_06138bc5,\n author = {Rudolph, Maja and Ruiz, Francisco and Mandt, Stephan and Blei, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Exponential Family Embeddings},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/06138bc5af6023646ede0e1f7c1eac75-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/06138bc5af6023646ede0e1f7c1eac75-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/06138bc5af6023646ede0e1f7c1eac75-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/06138bc5af6023646ede0e1f7c1eac75-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/06138bc5af6023646ede0e1f7c1eac75-Reviews.html", "metareview": "", "pdf_size": 1005825, "gs_citation": 161, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16573625594067055136&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/06138bc5af6023646ede0e1f7c1eac75-Abstract.html" }, { "title": "Exponential expressivity in deep neural networks through transient chaos", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7128", "id": "7128", "author_site": "Ben Poole, Subhaneil Lahiri, Maithra Raghu, Jascha Sohl-Dickstein, Surya Ganguli", "author": "Ben Poole; Subhaneil Lahiri; Maithra Raghu; Jascha Sohl-Dickstein; Surya Ganguli", "abstract": "We combine Riemannian geometry with the mean field theory of high dimensional chaos to study the nature of signal propagation in deep neural networks with random weights. Our results reveal a phase transition in the expressivity of random deep networks, with networks in the chaotic phase computing nonlinear functions whose global curvature grows exponentially with depth, but not with width. We prove that this generic class of random functions cannot be efficiently computed by any shallow network, going beyond prior work that restricts their analysis to single functions. Moreover, we formally quantify and demonstrate the long conjectured idea that deep networks can disentangle exponentially curved manifolds in input space into flat manifolds in hidden space. Our theoretical framework for analyzing the expressive power of deep networks is broadly applicable and provides a basis for quantifying previously abstract notions about the geometry of deep functions.", "bibtex": "@inproceedings{NIPS2016_14851003,\n author = {Poole, Ben and Lahiri, Subhaneil and Raghu, Maithra and Sohl-Dickstein, Jascha and Ganguli, Surya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Exponential expressivity in deep neural networks through transient chaos},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/148510031349642de5ca0c544f31b2ef-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/148510031349642de5ca0c544f31b2ef-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/148510031349642de5ca0c544f31b2ef-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/148510031349642de5ca0c544f31b2ef-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/148510031349642de5ca0c544f31b2ef-Reviews.html", "metareview": "", "pdf_size": 1678872, "gs_citation": 717, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10408494153995210425&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Stanford University; Stanford University; Google Brain; Google Brain; Stanford University", "aff_domain": "stanford.edu;stanford.edu;google.com;google.com;stanford.edu", "email": "stanford.edu;stanford.edu;google.com;google.com;stanford.edu", "github": "https://github.com/ganguli-lab/deepchaos", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/148510031349642de5ca0c544f31b2ef-Abstract.html", "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google Brain", "aff_unique_url": "https://www.stanford.edu;https://brain.google.com", "aff_unique_abbr": "Stanford;Google Brain", "aff_campus_unique_index": "0;0;1;1;0", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "FPNN: Field Probing Neural Networks for 3D Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6941", "id": "6941", "author_site": "Yangyan Li, Soeren Pirk, Hao Su, Charles R Qi, Leonidas Guibas", "author": "Yangyan Li; Soeren Pirk; Hao Su; Charles R Qi; Leonidas Guibas", "abstract": "Building discriminative representations for 3D data has been an important task in computer graphics and computer vision research. Convolutional Neural Networks (CNNs) have shown to operate on 2D images with great success for a variety of tasks. Lifting convolution operators to 3D (3DCNNs) seems like a plausible and promising next step. Unfortunately, the computational complexity of 3D CNNs grows cubically with respect to voxel resolution. Moreover, since most 3D geometry representations are boundary based, occupied regions do not increase proportionately with the size of the discretization, resulting in wasted computation. In this work, we represent 3D spaces as volumetric fields, and propose a novel design that employs field probing filters to efficiently extract features from them. Each field probing filter is a set of probing points -- sensors that perceive the space. Our learning algorithm optimizes not only the weights associated with the probing points, but also their locations, which deforms the shape of the probing filters and adaptively distributes them in 3D space. The optimized probing points sense the 3D space \"intelligently\", rather than operating blindly over the entire domain. We show that field probing is significantly more efficient than 3DCNNs, while providing state-of-the-art performance, on classification tasks for 3D object recognition benchmark datasets.", "bibtex": "@inproceedings{NIPS2016_854d6fae,\n author = {Li, Yangyan and Pirk, Soeren and Su, Hao and Qi, Charles R and Guibas, Leonidas J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {FPNN: Field Probing Neural Networks for 3D Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/854d6fae5ee42911677c739ee1734486-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/854d6fae5ee42911677c739ee1734486-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/854d6fae5ee42911677c739ee1734486-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/854d6fae5ee42911677c739ee1734486-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/854d6fae5ee42911677c739ee1734486-Reviews.html", "metareview": "", "pdf_size": 15099377, "gs_citation": 377, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=859936466690405942&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Stanford University, USA+Shandong University, China; Stanford University, USA; Stanford University, USA; Stanford University, USA; Stanford University, USA", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/854d6fae5ee42911677c739ee1734486-Abstract.html", "aff_unique_index": "0+1;0;0;0;0", "aff_unique_norm": "Stanford University;Shandong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;http://www.sdu.edu.cn", "aff_unique_abbr": "Stanford;SDU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0+1;0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Fairness in Learning: Classic and Contextual Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8516", "id": "8516", "author_site": "Matthew Joseph, Michael Kearns, Jamie Morgenstern, Aaron Roth", "author": "Matthew Joseph; Michael Kearns; Jamie H Morgenstern; Aaron Roth", "abstract": "We introduce the study of fairness in multi-armed bandit problems. Our fairness definition demands that, given a pool of applicants, a worse applicant is never favored over a better one, despite a learning algorithm\u2019s uncertainty over the true payoffs. In the classic stochastic bandits problem we provide a provably fair algorithm based on \u201cchained\u201d confidence intervals, and prove a cumulative regret bound with a cubic dependence on the number of arms. We further show that any fair algorithm must have such a dependence, providing a strong separation between fair and unfair learning that extends to the general contextual case. In the general contextual case, we prove a tight connection between fairness and the KWIK (Knows What It Knows) learning model: a KWIK algorithm for a class of functions can be transformed into a provably fair contextual bandit algorithm and vice versa. This tight connection allows us to provide a provably fair algorithm for the linear contextual bandit problem with a polynomial dependence on the dimension, and to show (for a different class of functions) a worst-case exponential gap in regret between fair and non-fair learning algorithms.", "bibtex": "@inproceedings{NIPS2016_eb163727,\n author = {Joseph, Matthew and Kearns, Michael and Morgenstern, Jamie H and Roth, Aaron},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fairness in Learning: Classic and Contextual Bandits},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/eb163727917cbba1eea208541a643e74-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/eb163727917cbba1eea208541a643e74-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/eb163727917cbba1eea208541a643e74-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/eb163727917cbba1eea208541a643e74-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/eb163727917cbba1eea208541a643e74-Reviews.html", "metareview": "", "pdf_size": 331177, "gs_citation": 588, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1978958142859066975&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "University of Pennsylvania, Department of Computer and Information Science; University of Pennsylvania, Department of Computer and Information Science; University of Pennsylvania, Department of Computer and Information Science; University of Pennsylvania, Department of Computer and Information Science", "aff_domain": "cis.upenn.edu;cis.upenn.edu;cis.upenn.edu;cis.upenn.edu", "email": "cis.upenn.edu;cis.upenn.edu;cis.upenn.edu;cis.upenn.edu", "github": "", "project": "https://arxiv.org/abs/17", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/eb163727917cbba1eea208541a643e74-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "Department of Computer and Information Science", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fast Active Set Methods for Online Spike Inference from Calcium Imaging", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7074", "id": "7074", "author_site": "Johannes Friedrich, Liam Paninski", "author": "Johannes Friedrich; Liam Paninski", "abstract": "Fluorescent calcium indicators are a popular means for observing the spiking activity of large neuronal populations. Unfortunately, extracting the spike train of each neuron from raw fluorescence calcium imaging data is a nontrivial problem. We present a fast online active set method to solve this sparse nonnegative deconvolution problem. Importantly, the algorithm progresses through each time series sequentially from beginning to end, thus enabling real-time online spike inference during the imaging session. Our algorithm is a generalization of the pool adjacent violators algorithm (PAVA) for isotonic regression and inherits its linear-time computational complexity. We gain remarkable increases in processing speed: more than one order of magnitude compared to currently employed state of the art convex solvers relying on interior point methods. Our method can exploit warm starts; therefore optimizing model hyperparameters only requires a handful of passes through the data. The algorithm enables real-time simultaneous deconvolution of $O(10^5)$ traces of whole-brain zebrafish imaging data on a laptop.", "bibtex": "@inproceedings{NIPS2016_fc2c7c47,\n author = {Friedrich, Johannes and Paninski, Liam},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast Active Set Methods for Online Spike Inference from Calcium Imaging},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fc2c7c47b918d0c2d792a719dfb602ef-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fc2c7c47b918d0c2d792a719dfb602ef-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/fc2c7c47b918d0c2d792a719dfb602ef-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fc2c7c47b918d0c2d792a719dfb602ef-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fc2c7c47b918d0c2d792a719dfb602ef-Reviews.html", "metareview": "", "pdf_size": 720324, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8218903791592566448&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Grossman Center and Department of Statistics, Columbia University, New York, NY + Janelia Research Campus, Ashburn, VA; Grossman Center and Department of Statistics, Columbia University, New York, NY", "aff_domain": "columbia.edu;stat.columbia.edu", "email": "columbia.edu;stat.columbia.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fc2c7c47b918d0c2d792a719dfb602ef-Abstract.html", "aff_unique_index": "0+1;0", "aff_unique_norm": "Columbia University;Janelia Research Campus", "aff_unique_dep": "Department of Statistics;", "aff_unique_url": "https://www.columbia.edu;https://www.janelia.org", "aff_unique_abbr": "Columbia;", "aff_campus_unique_index": "0+1;0", "aff_campus_unique": "New York;Ashburn", "aff_country_unique_index": "0+0;0", "aff_country_unique": "United States" }, { "title": "Fast Algorithms for Robust PCA via Gradient Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7061", "id": "7061", "author_site": "Xinyang Yi, Dohyung Park, Yudong Chen, Constantine Caramanis", "author": "Xinyang Yi; Dohyung Park; Yudong Chen; Constantine Caramanis", "abstract": "We consider the problem of Robust PCA in the fully and partially observed settings. Without corruptions, this is the well-known matrix completion problem. From a statistical standpoint this problem has been recently well-studied, and conditions on when recovery is possible (how many observations do we need, how many corruptions can we tolerate) via polynomial-time algorithms is by now understood. This paper presents and analyzes a non-convex optimization approach that greatly reduces the computational complexity of the above problems, compared to the best available algorithms. In particular, in the fully observed case, with $r$ denoting rank and $d$ dimension, we reduce the complexity from $O(r^2d^2\\log(1/\\epsilon))$ to $O(rd^2\\log(1/\\epsilon))$ -- a big savings when the rank is big. For the partially observed case, we show the complexity of our algorithm is no more than $O(r^4d\\log(d)\\log(1/\\epsilon))$. Not only is this the best-known run-time for a provable algorithm under partial observation, but in the setting where $r$ is small compared to $d$, it also allows for near-linear-in-$d$ run-time that can be exploited in the fully-observed case as well, by simply running our algorithm on a subset of the observations.", "bibtex": "@inproceedings{NIPS2016_b5f1e8fb,\n author = {Yi, Xinyang and Park, Dohyung and Chen, Yudong and Caramanis, Constantine},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast Algorithms for Robust PCA via Gradient Descent},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b5f1e8fb36cd7fbeb7988e8639ac79e9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b5f1e8fb36cd7fbeb7988e8639ac79e9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b5f1e8fb36cd7fbeb7988e8639ac79e9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b5f1e8fb36cd7fbeb7988e8639ac79e9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b5f1e8fb36cd7fbeb7988e8639ac79e9-Reviews.html", "metareview": "", "pdf_size": 1128195, "gs_citation": 329, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15642566578800266635&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "The University of Texas at Austin; The University of Texas at Austin; Cornell University; The University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;cornell.edu;utexas.edu", "email": "utexas.edu;utexas.edu;cornell.edu;utexas.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b5f1e8fb36cd7fbeb7988e8639ac79e9-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Texas at Austin;Cornell University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.cornell.edu", "aff_unique_abbr": "UT Austin;Cornell", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fast Distributed Submodular Cover: Public-Private Data Summarization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7174", "id": "7174", "author_site": "Baharan Mirzasoleiman, Morteza Zadimoghaddam, Amin Karbasi", "author": "Baharan Mirzasoleiman; Morteza Zadimoghaddam; Amin Karbasi", "abstract": "In this paper, we introduce the public-private framework of data summarization motivated by privacy concerns in personalized recommender systems and online social services. Such systems have usually access to massive data generated by a large pool of users. A major fraction of the data is public and is visible to (and can be used for) all users. However, each user can also contribute some private data that should not be shared with other users to ensure her privacy. The goal is to provide a succinct summary of massive dataset, ideally as small as possible, from which customized summaries can be built for each user, i.e. it can contain elements from the public data (for diversity) and users' private data (for personalization). To formalize the above challenge, we assume that the scoring function according to which a user evaluates the utility of her summary satisfies submodularity, a widely used notion in data summarization applications. Thus, we model the data summarization targeted to each user as an instance of a submodular cover problem. However, when the data is massive it is infeasible to use the centralized greedy algorithm to find a customized summary even for a single user. Moreover, for a large pool of users, it is too time consuming to find such summaries separately. Instead, we develop a fast distributed algorithm for submodular cover, FASTCOVER, that provides a succinct summary in one shot and for all users. We show that the solution provided by FASTCOVER is competitive with that of the centralized algorithm with the number of rounds that is exponentially smaller than state of the art results. Moreover, we have implemented FASTCOVER with Spark to demonstrate its practical performance on a number of concrete applications, including personalized location recommendation, personalized movie recommendation, and dominating set on tens of millions of data points and varying number of users.", "bibtex": "@inproceedings{NIPS2016_05233523,\n author = {Mirzasoleiman, Baharan and Zadimoghaddam, Morteza and Karbasi, Amin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast Distributed Submodular Cover: Public-Private Data Summarization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/052335232b11864986bb2fa20fa38748-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/052335232b11864986bb2fa20fa38748-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/052335232b11864986bb2fa20fa38748-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/052335232b11864986bb2fa20fa38748-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/052335232b11864986bb2fa20fa38748-Reviews.html", "metareview": "", "pdf_size": 568145, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8347112238583344520&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/052335232b11864986bb2fa20fa38748-Abstract.html" }, { "title": "Fast Mixing Markov Chains for Strongly Rayleigh Measures, DPPs, and Constrained Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7228", "id": "7228", "author_site": "Chengtao Li, Suvrit Sra, Stefanie Jegelka", "author": "Chengtao Li; Suvrit Sra; Stefanie Jegelka", "abstract": "We study probability measures induced by set functions with constraints. Such measures arise in a variety of real-world settings, where prior knowledge, resource limitations, or other pragmatic considerations impose constraints. We consider the task of rapidly sampling from such constrained measures, and develop fast Markov chain samplers for them. Our first main result is for MCMC sampling from Strongly Rayleigh (SR) measures, for which we present sharp polynomial bounds on the mixing time. As a corollary, this result yields a fast mixing sampler for Determinantal Point Processes (DPPs), yielding (to our knowledge) the first provably fast MCMC sampler for DPPs since their inception over four decades ago. Beyond SR measures, we develop MCMC samplers for probabilistic models with hard constraints and identify sufficient conditions under which their chains mix rapidly. We illustrate our claims by empirically verifying the dependence of mixing times on the key factors governing our theoretical bounds.", "bibtex": "@inproceedings{NIPS2016_850af92f,\n author = {Li, Chengtao and Sra, Suvrit and Jegelka, Stefanie},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast Mixing Markov Chains for Strongly Rayleigh Measures, DPPs, and Constrained Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/850af92f8d9903e7a4e0559a98ecc857-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/850af92f8d9903e7a4e0559a98ecc857-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/850af92f8d9903e7a4e0559a98ecc857-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/850af92f8d9903e7a4e0559a98ecc857-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/850af92f8d9903e7a4e0559a98ecc857-Reviews.html", "metareview": "", "pdf_size": 624202, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16359691599168427186&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "MIT; MIT; MIT", "aff_domain": "mit.edu;csail.mit.edu;mit.edu", "email": "mit.edu;csail.mit.edu;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/850af92f8d9903e7a4e0559a98ecc857-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fast and Flexible Monotonic Functions with Ensembles of Lattices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7302", "id": "7302", "author_site": "Mahdi Milani Fard, Kevin Canini, Andrew Cotter, Jan Pfeifer, Maya Gupta", "author": "Mahdi Milani Fard; Kevin Canini; Andrew Cotter; Jan Pfeifer; Maya Gupta", "abstract": "For many machine learning problems, there are some inputs that are known to be positively (or negatively) related to the output, and in such cases training the model to respect that monotonic relationship can provide regularization, and makes the model more interpretable. However, flexible monotonic functions are computationally challenging to learn beyond a few features. We break through this barrier by learning ensembles of monotonic calibrated interpolated look-up tables (lattices). A key contribution is an automated algorithm for selecting feature subsets for the ensemble base models. We demonstrate that compared to random forests, these ensembles produce similar or better accuracy, while providing guaranteed monotonicity consistent with prior knowledge, smaller model size and faster evaluation.", "bibtex": "@inproceedings{NIPS2016_c913303f,\n author = {Milani Fard, Mahdi and Canini, Kevin and Cotter, Andrew and Pfeifer, Jan and Gupta, Maya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast and Flexible Monotonic Functions with Ensembles of Lattices},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c913303f392ffc643f7240b180602652-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c913303f392ffc643f7240b180602652-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c913303f392ffc643f7240b180602652-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c913303f392ffc643f7240b180602652-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c913303f392ffc643f7240b180602652-Reviews.html", "metareview": "", "pdf_size": 375277, "gs_citation": 95, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6432286322533431748&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c913303f392ffc643f7240b180602652-Abstract.html" }, { "title": "Fast and Provably Good Seedings for k-Means", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7208", "id": "7208", "author_site": "Olivier Bachem, Mario Lucic, Hamed Hassani, Andreas Krause", "author": "Olivier Bachem; Mario Lucic; Hamed Hassani; Andreas Krause", "abstract": "Seeding - the task of finding initial cluster centers - is critical in obtaining high-quality clusterings for k-Means. However, k-means++ seeding, the state of the art algorithm, does not scale well to massive datasets as it is inherently sequential and requires k full passes through the data. It was recently shown that Markov chain Monte Carlo sampling can be used to efficiently approximate the seeding step of k-means++. However, this result requires assumptions on the data generating distribution. We propose a simple yet fast seeding algorithm that produces", "bibtex": "@inproceedings{NIPS2016_d67d8ab4,\n author = {Bachem, Olivier and Lucic, Mario and Hassani, Hamed and Krause, Andreas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast and Provably Good Seedings for k-Means},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d67d8ab4f4c10bf22aa353e27879133c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d67d8ab4f4c10bf22aa353e27879133c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d67d8ab4f4c10bf22aa353e27879133c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d67d8ab4f4c10bf22aa353e27879133c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d67d8ab4f4c10bf22aa353e27879133c-Reviews.html", "metareview": "", "pdf_size": 1314311, "gs_citation": 216, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8340975356911213797&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science, ETH Zurich; Department of Computer Science, ETH Zurich; Department of Computer Science, ETH Zurich; Department of Computer Science, ETH Zurich", "aff_domain": "inf.ethz.ch;inf.ethz.ch;inf.ethz.ch;ethz.ch", "email": "inf.ethz.ch;inf.ethz.ch;inf.ethz.ch;ethz.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d67d8ab4f4c10bf22aa353e27879133c-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Fast and accurate spike sorting of high-channel count probes with KiloSort", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7161", "id": "7161", "author_site": "Marius Pachitariu, Nicholas A Steinmetz, Shabnam N Kadir, Matteo Carandini, Kenneth D Harris", "author": "Marius Pachitariu; Nicholas A Steinmetz; Shabnam N Kadir; Matteo Carandini; Kenneth D Harris", "abstract": "New silicon technology is enabling large-scale electrophysiological recordings in vivo from hundreds to thousands of channels. Interpreting these recordings requires scalable and accurate automated methods for spike sorting, which should minimize the time required for manual curation of the results. Here we introduce KiloSort, a new integrated spike sorting framework that uses template matching both during spike detection and during spike clustering. KiloSort models the electrical voltage as a sum of template waveforms triggered on the spike times, which allows overlapping spikes to be identified and resolved. Unlike previous algorithms that compress the data with PCA, KiloSort operates on the raw data which allows it to construct a more accurate model of the waveforms. Processing times are faster than in previous algorithms thanks to batch-based optimization on GPUs. We compare KiloSort to an established algorithm and show favorable performance, at much reduced processing times. A novel post-clustering merging step based on the continuity of the templates further reduced substantially the number of manual operations required on this data, for the neurons with near-zero error rates, paving the way for fully automated spike sorting of multichannel electrode recordings.", "bibtex": "@inproceedings{NIPS2016_1145a30f,\n author = {Pachitariu, Marius and Steinmetz, Nicholas A and Kadir, Shabnam N and Carandini, Matteo and Harris, Kenneth D},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast and accurate spike sorting of high-channel count probes with KiloSort},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1145a30ff80745b56fb0cecf65305017-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1145a30ff80745b56fb0cecf65305017-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1145a30ff80745b56fb0cecf65305017-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1145a30ff80745b56fb0cecf65305017-Reviews.html", "metareview": "", "pdf_size": 774009, "gs_citation": 519, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11485697269047189896&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 19, "aff": "UCL, UK; UCL, UK; UCL, UK; UCL, UK; UCL, UK", "aff_domain": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "email": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1145a30ff80745b56fb0cecf65305017-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Fast learning rates with heavy-tailed losses", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7383", "id": "7383", "author_site": "Vu C Dinh, Lam S Ho, Binh Nguyen, Duy Nguyen", "author": "Vu C Dinh; Lam S Ho; Binh Nguyen; Duy Nguyen", "abstract": "We study fast learning rates when the losses are not necessarily bounded and may have a distribution with heavy tails. To enable such analyses, we introduce two new conditions: (i) the envelope function $\\sup_{f \\in \\mathcal{F}}|\\ell \\circ f|$, where $\\ell$ is the loss function and $\\mathcal{F}$ is the hypothesis class, exists and is $L^r$-integrable, and (ii) $\\ell$ satisfies the multi-scale Bernstein's condition on $\\mathcal{F}$. Under these assumptions, we prove that learning rate faster than $O(n^{-1/2})$ can be obtained and, depending on $r$ and the multi-scale Bernstein's powers, can be arbitrarily close to $O(n^{-1})$. We then verify these assumptions and derive fast learning rates for the problem of vector quantization by $k$-means clustering with heavy-tailed distributions. The analyses enable us to obtain novel learning rates that extend and complement existing results in the literature from both theoretical and practical viewpoints.", "bibtex": "@inproceedings{NIPS2016_63923f49,\n author = {Dinh, Vu C and Ho, Lam S and Nguyen, Binh and Nguyen, Duy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast learning rates with heavy-tailed losses},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/63923f49e5241343aa7acb6a06a751e7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/63923f49e5241343aa7acb6a06a751e7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/63923f49e5241343aa7acb6a06a751e7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/63923f49e5241343aa7acb6a06a751e7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/63923f49e5241343aa7acb6a06a751e7-Reviews.html", "metareview": "", "pdf_size": 292508, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4292618369049305087&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/63923f49e5241343aa7acb6a06a751e7-Abstract.html" }, { "title": "Fast recovery from a union of subspaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7352", "id": "7352", "author_site": "Chinmay Hegde, Piotr Indyk, Ludwig Schmidt", "author": "Chinmay Hegde; Piotr Indyk; Ludwig Schmidt", "abstract": "We address the problem of recovering a high-dimensional but structured vector from linear observations in a general setting where the vector can come from an arbitrary union of subspaces. This setup includes well-studied problems such as compressive sensing and low-rank matrix recovery. We show how to design more efficient algorithms for the union-of subspace recovery problem by using", "bibtex": "@inproceedings{NIPS2016_8929c70f,\n author = {Hegde, Chinmay and Indyk, Piotr and Schmidt, Ludwig},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast recovery from a union of subspaces},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8929c70f8d710e412d38da624b21c3c8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8929c70f8d710e412d38da624b21c3c8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8929c70f8d710e412d38da624b21c3c8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8929c70f8d710e412d38da624b21c3c8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8929c70f8d710e412d38da624b21c3c8-Reviews.html", "metareview": "", "pdf_size": 380741, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3573951337447261655&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Iowa State University; MIT; MIT", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8929c70f8d710e412d38da624b21c3c8-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Iowa State University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.iastate.edu;https://web.mit.edu", "aff_unique_abbr": "ISU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7328", "id": "7328", "author_site": "George Papamakarios, Iain Murray", "author": "George Papamakarios; Iain Murray", "abstract": "Many statistical models can be simulated forwards but have intractable likelihoods. Approximate Bayesian Computation (ABC) methods are used to infer properties of these models from data. Traditionally these methods approximate the posterior over parameters by conditioning on data being inside an \u03b5-ball around the observed data, which is only correct in the limit \u03b5\u21920. Monte Carlo methods can then draw samples from the approximate posterior to approximate predictions or error bars on parameters. These algorithms critically slow down as \u03b5\u21920, and in practice draw samples from a broader distribution than the posterior. We propose a new approach to likelihood-free inference based on Bayesian conditional density estimation. Preliminary inferences based on limited simulation data are used to guide later simulations. In some cases, learning an accurate parametric representation of the entire true posterior distribution requires fewer model simulations than Monte Carlo ABC methods need to produce a single sample from an approximate posterior.", "bibtex": "@inproceedings{NIPS2016_6aca9700,\n author = {Papamakarios, George and Murray, Iain},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fast \\epsilon -free Inference of Simulation Models with Bayesian Conditional Density Estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6aca97005c68f1206823815f66102863-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6aca97005c68f1206823815f66102863-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6aca97005c68f1206823815f66102863-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6aca97005c68f1206823815f66102863-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6aca97005c68f1206823815f66102863-Reviews.html", "metareview": "", "pdf_size": 639421, "gs_citation": 515, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13216051061618218810&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "School of Informatics, University of Edinburgh; School of Informatics, University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk", "email": "ed.ac.uk;ed.ac.uk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6aca97005c68f1206823815f66102863-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "School of Informatics", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Edinburgh", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Faster Projection-free Convex Optimization over the Spectrahedron", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7078", "id": "7078", "author_site": "Dan Garber, Dan Garber", "author": "Dan Garber; Dan Garber", "abstract": "Minimizing a convex function over the spectrahedron, i.e., the set of all $d\\times d$ positive semidefinite matrices with unit trace, is an important optimization task with many applications in optimization, machine learning, and signal processing. It is also notoriously difficult to solve in large-scale since standard techniques require to compute expensive matrix decompositions. An alternative, is the conditional gradient method (aka Frank-Wolfe algorithm) that regained much interest in recent years, mostly due to its application to this specific setting. The key benefit of the CG method is that it avoids expensive matrix decompositions all together, and simply requires a single eigenvector computation per iteration, which is much more efficient. On the downside, the CG method, in general, converges with an inferior rate. The error for minimizing a $\\beta$-smooth function after $t$ iterations scales like $\\beta/t$. This rate does not improve even if the function is also strongly convex. In this work we present a modification of the CG method tailored for the spectrahedron. The per-iteration complexity of the method is essentially identical to that of the standard CG method: only a single eigenvecor computation is required. For minimizing an $\\alpha$-strongly convex and $\\beta$-smooth function, the \\textit{expected} error of the method after $t$ iterations is: $O\\left({\\min\\{\\frac{\\beta{}}{t} ,\\left({\\frac{\\beta\\sqrt{\\rank(\\X^*)}}{\\alpha^{1/4}t}}\\right)^{4/3}, \\left({\\frac{\\beta}{\\sqrt{\\alpha}\\lambda_{\\min}(\\X^*)t}}\\right)^{2}\\}}\\right)$. Beyond the significant improvement in convergence rate, it also follows that when the optimum is low-rank, our method provides better accuracy-rank tradeoff than the standard CG method. To the best of our knowledge, this is the first result that attains provably faster convergence rates for a CG variant for optimization over the spectrahedron. We also present encouraging preliminary empirical results.", "bibtex": "@inproceedings{NIPS2016_df877f38,\n author = {Garber, Dan and Garber, Dan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Faster Projection-free Convex Optimization over the Spectrahedron},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/df877f3865752637daa540ea9cbc474f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/df877f3865752637daa540ea9cbc474f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/df877f3865752637daa540ea9cbc474f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/df877f3865752637daa540ea9cbc474f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/df877f3865752637daa540ea9cbc474f-Reviews.html", "metareview": "", "pdf_size": 415648, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15022169561973618164&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": ";", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/df877f3865752637daa540ea9cbc474f-Abstract.html" }, { "title": "Feature selection in functional data classification with recursive maxima hunting", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7087", "id": "7087", "author_site": "Jos\u00e9 L. Torrecilla, Alberto Su\u00e1rez", "author": "Jos\u00e9 L. Torrecilla; Alberto Su\u00e1rez", "abstract": "Dimensionality reduction is one of the key issues in the design of effective machine learning methods for automatic induction. In this work, we introduce recursive maxima hunting (RMH) for variable selection in classification problems with functional data. In this context, variable selection techniques are especially attractive because they reduce the dimensionality, facilitate the interpretation and can improve the accuracy of the predictive models. The method, which is a recursive extension of maxima hunting (MH), performs variable selection by identifying the maxima of a relevance function, which measures the strength of the correlation of the predictor functional variable with the class label. At each stage, the information associated with the selected variable is removed by subtracting the conditional expectation of the process. The results of an extensive empirical evaluation are used to illustrate that, in the problems investigated, RMH has comparable or higher predictive accuracy than standard simensionality reduction techniques, such as PCA and PLS, and state-of-the-art feature selection methods for functional data, such as maxima hunting.", "bibtex": "@inproceedings{NIPS2016_28b60a16,\n author = {Torrecilla, Jos\\'{e} L. and Su\\'{a}rez, Alberto},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Feature selection in functional data classification with recursive maxima hunting},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/28b60a16b55fd531047c0c958ce14b95-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/28b60a16b55fd531047c0c958ce14b95-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/28b60a16b55fd531047c0c958ce14b95-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/28b60a16b55fd531047c0c958ce14b95-Reviews.html", "metareview": "", "pdf_size": 617568, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3763709583413624948&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Computer Science Department, Universidad Aut\u00f3noma de Madrid; Computer Science Department, Universidad Aut\u00f3noma de Madrid", "aff_domain": "uam.es;uam.es", "email": "uam.es;uam.es", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/28b60a16b55fd531047c0c958ce14b95-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Universidad Aut\u00f3noma de Madrid", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.uam.es", "aff_unique_abbr": "UAM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Spain" }, { "title": "Feature-distributed sparse regression: a screen-and-clean approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7346", "id": "7346", "author_site": "Jiyan Yang, Michael Mahoney, Michael Saunders, Yuekai Sun", "author": "Jiyan Yang; Michael W. Mahoney; Michael Saunders; Yuekai Sun", "abstract": "Most existing approaches to distributed sparse regression assume the data is partitioned by samples. However, for high-dimensional data (D >> N), it is more natural to partition the data by features. We propose an algorithm to distributed sparse regression when the data is partitioned by features rather than samples. Our approach allows the user to tailor our general method to various distributed computing platforms by trading-off the total amount of data (in bits) sent over the communication network and the number of rounds of communication. We show that an implementation of our approach is capable of solving L1-regularized L2 regression problems with millions of features in minutes.", "bibtex": "@inproceedings{NIPS2016_363763e5,\n author = {Yang, Jiyan and Mahoney, Michael W and Saunders, Michael and Sun, Yuekai},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Feature-distributed sparse regression: a screen-and-clean approach},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/363763e5c3dc3a68b399058c34aecf2c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/363763e5c3dc3a68b399058c34aecf2c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/363763e5c3dc3a68b399058c34aecf2c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/363763e5c3dc3a68b399058c34aecf2c-Reviews.html", "metareview": "", "pdf_size": 435941, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9623896092893352924&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff": "Stanford University; University of California at Berkeley; Stanford University; University of Michigan", "aff_domain": "stanford.edu;stat.berkeley.edu;stanford.edu;umich.edu", "email": "stanford.edu;stat.berkeley.edu;stanford.edu;umich.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/363763e5c3dc3a68b399058c34aecf2c-Abstract.html", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Stanford University;University of California, Berkeley;University of Michigan", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.berkeley.edu;https://www.umich.edu", "aff_unique_abbr": "Stanford;UC Berkeley;UM", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Stanford;Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Finding significant combinations of features in the presence of categorical covariates", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6925", "id": "6925", "author_site": "Laetitia Papaxanthos, Felipe Llinares-L\u00f3pez, Dean Bodenham, Karsten Borgwardt", "author": "Laetitia Papaxanthos; Felipe Llinares-L\u00f3pez; Dean Bodenham; Karsten Borgwardt", "abstract": "In high-dimensional settings, where the number of features p is typically much larger than the number of samples n, methods which can systematically examine arbitrary combinations of features, a huge 2^p-dimensional space, have recently begun to be explored. However, none of the current methods is able to assess the association between feature combinations and a target variable while conditioning on a categorical covariate, in order to correct for potential confounding effects. We propose the Fast Automatic Conditional Search (FACS) algorithm, a significant discriminative itemset mining method which conditions on categorical covariates and only scales as O(k log k), where k is the number of states of the categorical covariate. Based on the Cochran-Mantel-Haenszel Test, FACS demonstrates superior speed and statistical power on simulated and real-world datasets compared to the state of the art, opening the door to numerous applications in biomedicine.", "bibtex": "@inproceedings{NIPS2016_0a0a0c8a,\n author = {Papaxanthos, Laetitia and Llinares-L\\'{o}pez, Felipe and Bodenham, Dean and Borgwardt, Karsten},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Finding significant combinations of features in the presence of categorical covariates},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0a0a0c8aaa00ade50f74a3f0ca981ed7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0a0a0c8aaa00ade50f74a3f0ca981ed7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0a0a0c8aaa00ade50f74a3f0ca981ed7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0a0a0c8aaa00ade50f74a3f0ca981ed7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0a0a0c8aaa00ade50f74a3f0ca981ed7-Reviews.html", "metareview": "", "pdf_size": 550335, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3967308073262753563&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0a0a0c8aaa00ade50f74a3f0ca981ed7-Abstract.html" }, { "title": "Finite Sample Prediction and Recovery Bounds for Ordinal Embedding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7323", "id": "7323", "author_site": "Lalit Jain, Kevin Jamieson, Rob Nowak", "author": "Lalit Jain; Kevin G. Jamieson; Rob Nowak", "abstract": "The goal of ordinal embedding is to represent items as points in a low-dimensional Euclidean space given a set of constraints like ``item $i$ is closer to item $j$ than item $k$''. Ordinal constraints like this often come from human judgments. The classic approach to solving this problem is known as non-metric multidimensional scaling. To account for errors and variation in judgments, we consider the noisy situation in which the given constraints are independently corrupted by reversing the correct constraint with some probability. The ordinal embedding problem has been studied for decades, but most past work pays little attention to the question of whether accurate embedding is possible, apart from empirical studies. This paper shows that under a generative data model it is possible to learn the correct embedding from noisy distance comparisons. In establishing this fundamental result, the paper makes several new contributions. First, we derive prediction error bounds for embedding from noisy distance comparisons by exploiting the fact that the rank of a distance matrix of points in $\\R^d$ is at most $d+2$. These bounds characterize how well a learned embedding predicts new comparative judgments. Second, we show that the underlying embedding can be recovered by solving a simple convex optimization. This result is highly non-trivial since we show that the linear map corresponding to distance comparisons is non-invertible, but there exists a nonlinear map that is invertible. Third, two new algorithms for ordinal embedding are proposed and evaluated in experiments.", "bibtex": "@inproceedings{NIPS2016_4e0d67e5,\n author = {Jain, Lalit and Jamieson, Kevin G and Nowak, Rob},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Finite Sample Prediction and Recovery Bounds for Ordinal Embedding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/4e0d67e54ad6626e957d15b08ae128a6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/4e0d67e54ad6626e957d15b08ae128a6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/4e0d67e54ad6626e957d15b08ae128a6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/4e0d67e54ad6626e957d15b08ae128a6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/4e0d67e54ad6626e957d15b08ae128a6-Reviews.html", "metareview": "", "pdf_size": 642422, "gs_citation": 79, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12842512570850673226&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "University of Michigan; University of California, Berkeley; University of Wisconsin", "aff_domain": "umich.edu;berkeley.edu;wisc.edu", "email": "umich.edu;berkeley.edu;wisc.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/4e0d67e54ad6626e957d15b08ae128a6-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Michigan;University of California, Berkeley;University of Wisconsin", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umich.edu;https://www.berkeley.edu;https://www.wisc.edu", "aff_unique_abbr": "UM;UC Berkeley;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Finite-Dimensional BFRY Priors and Variational Bayesian Inference for Power Law Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6915", "id": "6915", "author_site": "Juho Lee, Lancelot F James, Seungjin Choi", "author": "Juho Lee; Lancelot F James; Seungjin Choi", "abstract": "Bayesian nonparametric methods based on the Dirichlet process (DP), gamma process and beta process, have proven effective in capturing aspects of various datasets arising in machine learning. However, it is now recognized that such processes have their limitations in terms of the ability to capture power law behavior. As such there is now considerable interest in models based on the Stable Processs (SP), Generalized Gamma process (GGP) and Stable-beta process (SBP). These models present new challenges in terms of practical statistical implementation. In analogy to tractable processes such as the finite-dimensional Dirichlet process, we describe a class of random processes, we call iid finite-dimensional BFRY processes, that enables one to begin to develop efficient posterior inference algorithms such as variational Bayes that readily scale to massive datasets. For illustrative purposes, we describe a simple variational Bayes algorithm for normalized SP mixture models, and demonstrate its usefulness with experiments on synthetic and real-world datasets.", "bibtex": "@inproceedings{NIPS2016_0d4f4805,\n author = {Lee, Juho and James, Lancelot F and Choi, Seungjin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Finite-Dimensional BFRY Priors and Variational Bayesian Inference for Power Law Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0d4f4805c36dc6853edfa4c7e1638b48-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0d4f4805c36dc6853edfa4c7e1638b48-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0d4f4805c36dc6853edfa4c7e1638b48-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0d4f4805c36dc6853edfa4c7e1638b48-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0d4f4805c36dc6853edfa4c7e1638b48-Reviews.html", "metareview": "", "pdf_size": 328640, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=19773849653827489&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "POSTECH, Korea; HKUST, Hong Kong; POSTECH, Korea", "aff_domain": "postech.ac.kr;ust.hk;postech.ac.kr", "email": "postech.ac.kr;ust.hk;postech.ac.kr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0d4f4805c36dc6853edfa4c7e1638b48-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Pohang University of Science and Technology;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.postech.ac.kr;https://www.ust.hk", "aff_unique_abbr": "POSTECH;HKUST", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Pohang;Hong Kong SAR", "aff_country_unique_index": "0;1;0", "aff_country_unique": "South Korea;China" }, { "title": "Finite-Sample Analysis of Fixed-k Nearest Neighbor Density Functional Estimators", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7081", "id": "7081", "author_site": "Shashank Singh, Barnabas Poczos", "author": "Shashank Singh; Barnabas Poczos", "abstract": "We provide finite-sample analysis of a general framework for using k-nearest neighbor statistics to estimate functionals of a nonparametric continuous probability density, including entropies and divergences. Rather than plugging a consistent density estimate (which requires k \u2192 \u221e as the sample size n \u2192 \u221e) into the functional of interest, the estimators we consider fix k and perform a bias correction. This can be more efficient computationally, and, as we show, statistically, leading to faster convergence rates. Our framework unifies several previous estimators, for most of which ours are the first finite sample guarantees.", "bibtex": "@inproceedings{NIPS2016_2dea61ee,\n author = {Singh, Shashank and Poczos, Barnabas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Finite-Sample Analysis of Fixed-k Nearest Neighbor Density Functional Estimators},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2dea61eed4bceec564a00115c4d21334-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2dea61eed4bceec564a00115c4d21334-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2dea61eed4bceec564a00115c4d21334-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2dea61eed4bceec564a00115c4d21334-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2dea61eed4bceec564a00115c4d21334-Reviews.html", "metareview": "", "pdf_size": 391371, "gs_citation": 71, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15792124040390107289&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Statistics & Machine Learning Departments, Carnegie Mellon University; Machine Learning Departments, Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cs.cmu.edu", "email": "andrew.cmu.edu;cs.cmu.edu", "github": "", "project": "https://bitbucket.org/szzoli/ite/", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2dea61eed4bceec564a00115c4d21334-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Statistics & Machine Learning Departments", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Flexible Models for Microclustering with Application to Entity Resolution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6983", "id": "6983", "author_site": "Brenda Betancourt, Giacomo Zanella, Jeffrey Miller, Hanna Wallach, Abbas Zaidi, Beka Steorts", "author": "Brenda Betancourt; Giacomo Zanella; Jeffrey W Miller; Hanna Wallach; Abbas Zaidi; Rebecca C. Steorts", "abstract": "Most generative models for clustering implicitly assume that the number of data points in each cluster grows linearly with the total number of data points. Finite mixture models, Dirichlet process mixture models, and Pitman--Yor process mixture models make this assumption, as do all other infinitely exchangeable clustering models. However, for some applications, this assumption is inappropriate. For example, when performing entity resolution, the size of each cluster should be unrelated to the size of the data set, and each cluster should contain a negligible fraction of the total number of data points. These applications require models that yield clusters whose sizes grow sublinearly with the size of the data set. We address this requirement by defining the microclustering property and introducing a new class of models that can exhibit this property. We compare models within this class to two commonly used clustering models using four entity-resolution data sets.", "bibtex": "@inproceedings{NIPS2016_670e8a43,\n author = {Betancourt, Brenda and Zanella, Giacomo and Miller, Jeffrey W and Wallach, Hanna and Zaidi, Abbas and Steorts, Rebecca C.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Flexible Models for Microclustering with Application to Entity Resolution},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/670e8a43b246801ca1eaca97b3e19189-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/670e8a43b246801ca1eaca97b3e19189-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/670e8a43b246801ca1eaca97b3e19189-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/670e8a43b246801ca1eaca97b3e19189-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/670e8a43b246801ca1eaca97b3e19189-Reviews.html", "metareview": "", "pdf_size": 383008, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2983288435255969319&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Department of Decision Sciences, Bocconi University; Department of Statistical Science, Duke University; Microsoft Research; Department of Biostatistics, Harvard University; Department of Statistical Science, Duke University; Departments of Statistical Science and Computer Science, Duke University", "aff_domain": "unibocconi.it;stat.duke.edu;dirichlet.net;hsph.harvard.edu;stat.duke.edu;stat.duke.edu", "email": "unibocconi.it;stat.duke.edu;dirichlet.net;hsph.harvard.edu;stat.duke.edu;stat.duke.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/670e8a43b246801ca1eaca97b3e19189-Abstract.html", "aff_unique_index": "0;1;2;3;1;1", "aff_unique_norm": "Bocconi University;Duke University;Microsoft;Harvard University", "aff_unique_dep": "Department of Decision Sciences;Department of Statistical Science;Microsoft Research;Department of Biostatistics", "aff_unique_url": "https://www.bocconi.edu;https://www.duke.edu;https://www.microsoft.com/en-us/research;https://www.harvard.edu", "aff_unique_abbr": "Bocconi;Duke;MSR;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "Italy;United States" }, { "title": "Following the Leader and Fast Rates in Linear Prediction: Curved Constraint Sets and Other Regularities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7103", "id": "7103", "author_site": "Ruitong Huang, Tor Lattimore, Andr\u00e1s Gy\u00f6rgy, Csaba Szepesvari", "author": "Ruitong Huang; Tor Lattimore; Andr\u00e1s Gy\u00f6rgy; Csaba Szepesvari", "abstract": "The follow the leader (FTL) algorithm, perhaps the simplest of all online learning algorithms, is known to perform well when the loss functions it is used on are positively curved. In this paper we ask whether there are other \"lucky\" settings when FTL achieves sublinear, \"small\" regret. In particular, we study the fundamental problem of linear prediction over a non-empty convex, compact domain. Amongst other results, we prove that the curvature of the boundary of the domain can act as if the losses were curved: In this case, we prove that as long as the mean of the loss vectors have positive lengths bounded away from zero, FTL enjoys a logarithmic growth rate of regret, while, e.g., for polyhedral domains and stochastic data it enjoys finite expected regret. Building on a previously known meta-algorithm, we also get an algorithm that simultaneously enjoys the worst-case guarantees and the bound available for FTL.", "bibtex": "@inproceedings{NIPS2016_55a988df,\n author = {Huang, Ruitong and Lattimore, Tor and Gy\\\"{o}rgy, Andr\\'{a}s and Szepesvari, Csaba},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Following the Leader and Fast Rates in Linear Prediction: Curved Constraint Sets and Other Regularities},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/55a988dfb00a914717b3000a3374694c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/55a988dfb00a914717b3000a3374694c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/55a988dfb00a914717b3000a3374694c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/55a988dfb00a914717b3000a3374694c-Reviews.html", "metareview": "", "pdf_size": 458086, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6160546889716009984&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Department of Computing Science, University of Alberta, AB, Canada; School of Informatics and Computing, Indiana University, IN, USA; Dept. of Electrical & Electronic Engineering, Imperial College London, UK; Department of Computing Science, University of Alberta, AB, Canada", "aff_domain": "ualberta.ca;gmail.com;imperial.ac.uk;ualberta.ca", "email": "ualberta.ca;gmail.com;imperial.ac.uk;ualberta.ca", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/55a988dfb00a914717b3000a3374694c-Abstract.html", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Alberta;Indiana University;Imperial College London", "aff_unique_dep": "Department of Computing Science;School of Informatics and Computing;Dept. of Electrical & Electronic Engineering", "aff_unique_url": "https://www.ualberta.ca;https://www.indiana.edu;https://www.imperial.ac.uk", "aff_unique_abbr": "UAlberta;IU;ICL", "aff_campus_unique_index": "0;1;2;0", "aff_campus_unique": "Edmonton;Bloomington;London", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Canada;United States;United Kingdom" }, { "title": "Full-Capacity Unitary Recurrent Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7168", "id": "7168", "author_site": "Scott Wisdom, Thomas Powers, John Hershey, Jonathan Le Roux, Les Atlas", "author": "Scott Wisdom; Thomas Powers; John Hershey; Jonathan Le Roux; Les Atlas", "abstract": "Recurrent neural networks are powerful models for processing sequential data, but they are generally plagued by vanishing and exploding gradient problems. Unitary recurrent neural networks (uRNNs), which use unitary recurrence matrices, have recently been proposed as a means to avoid these issues. However, in previous experiments, the recurrence matrices were restricted to be a product of parameterized unitary matrices, and an open question remains: when does such a parameterization fail to represent all unitary matrices, and how does this restricted representational capacity limit what can be learned? To address this question, we propose full-capacity uRNNs that optimize their recurrence matrix over all unitary matrices, leading to significantly improved performance over uRNNs that use a restricted-capacity recurrence matrix. Our contribution consists of two main components. First, we provide a theoretical argument to determine if a unitary parameterization has restricted capacity. Using this argument, we show that a recently proposed unitary parameterization has restricted capacity for hidden state dimension greater than 7. Second,we show how a complete, full-capacity unitary recurrence matrix can be optimized over the differentiable manifold of unitary matrices. The resulting multiplicative gradient step is very simple and does not require gradient clipping or learning rate adaptation. We confirm the utility of our claims by empirically evaluating our new full-capacity uRNNs on both synthetic and natural data, achieving superior performance compared to both LSTMs and the original restricted-capacity uRNNs.", "bibtex": "@inproceedings{NIPS2016_d9ff90f4,\n author = {Wisdom, Scott and Powers, Thomas and Hershey, John and Le Roux, Jonathan and Atlas, Les},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Full-Capacity Unitary Recurrent Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d9ff90f4000eacd3a6c9cb27f78994cf-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d9ff90f4000eacd3a6c9cb27f78994cf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d9ff90f4000eacd3a6c9cb27f78994cf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d9ff90f4000eacd3a6c9cb27f78994cf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d9ff90f4000eacd3a6c9cb27f78994cf-Reviews.html", "metareview": "", "pdf_size": 976262, "gs_citation": 384, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10492904561011221584&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Department of Electrical Engineering, University of Washington; Department of Electrical Engineering, University of Washington; Mitsubishi Electric Research Laboratories (MERL); Mitsubishi Electric Research Laboratories (MERL); Department of Electrical Engineering, University of Washington", "aff_domain": "uw.edu;uw.edu;merl.com;merl.com;uw.edu", "email": "uw.edu;uw.edu;merl.com;merl.com;uw.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d9ff90f4000eacd3a6c9cb27f78994cf-Abstract.html", "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of Washington;Mitsubishi Electric Research Laboratories", "aff_unique_dep": "Department of Electrical Engineering;", "aff_unique_url": "https://www.washington.edu;https://www.merl.com", "aff_unique_abbr": "UW;MERL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Fundamental Limits of Budget-Fidelity Trade-off in Label Crowdsourcing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7063", "id": "7063", "author_site": "Farshad Lahouti, Babak Hassibi", "author": "Farshad Lahouti; Babak Hassibi", "abstract": "Digital crowdsourcing (CS) is a modern approach to perform certain large projects using small contributions of a large crowd. In CS, a taskmaster typically breaks down the project into small batches of tasks and assigns them to so-called workers with imperfect skill levels. The crowdsourcer then collects and analyzes the results for inference and serving the purpose of the project. In this work, the CS problem, as a human-in-the-loop computation problem, is modeled and analyzed in an information theoretic rate-distortion framework. The purpose is to identify the ultimate fidelity that one can achieve by any form of query from the crowd and any decoding (inference) algorithm with a given budget. The results are established by a joint source channel (de)coding scheme, which represent the query scheme and inference, over parallel noisy channels, which model workers with imperfect skill levels. We also present and analyze a query scheme dubbed k-ary incidence coding and study optimized query pricing in this setting.", "bibtex": "@inproceedings{NIPS2016_339a18de,\n author = {Lahouti, Farshad and Hassibi, Babak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Fundamental Limits of Budget-Fidelity Trade-off in Label Crowdsourcing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/339a18def9898dd60a634b2ad8fbbd58-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/339a18def9898dd60a634b2ad8fbbd58-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/339a18def9898dd60a634b2ad8fbbd58-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/339a18def9898dd60a634b2ad8fbbd58-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/339a18def9898dd60a634b2ad8fbbd58-Reviews.html", "metareview": "", "pdf_size": 178321, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7574173443082228473&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Electrical Engineering Department, California Institute of Technology; Electrical Engineering Department, California Institute of Technology", "aff_domain": "caltech.edu;caltech.edu", "email": "caltech.edu;caltech.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/339a18def9898dd60a634b2ad8fbbd58-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "California Institute of Technology", "aff_unique_dep": "Electrical Engineering Department", "aff_unique_url": "https://www.caltech.edu", "aff_unique_abbr": "Caltech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pasadena", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "GAP Safe Screening Rules for Sparse-Group Lasso", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7112", "id": "7112", "author_site": "Eugene Ndiaye, Olivier Fercoq, Alexandre Gramfort, Joseph Salmon", "author": "Eugene Ndiaye; Olivier Fercoq; Alexandre Gramfort; Joseph Salmon", "abstract": "For statistical learning in high dimension, sparse regularizations have proven useful to boost both computational and statistical efficiency. In some contexts, it is natural to handle more refined structures than pure sparsity, such as for instance group sparsity. Sparse-Group Lasso has recently been introduced in the context of linear regression to enforce sparsity both at the feature and at the group level. We propose the first (provably) safe screening rules for Sparse-Group Lasso, i.e., rules that allow to discard early in the solver features/groups that are inactive at optimal solution. Thanks to efficient dual gap computations relying on the geometric properties of $\\epsilon$-norm, safe screening rules for Sparse-Group Lasso lead to significant gains in term of computing time for our coordinate descent implementation.", "bibtex": "@inproceedings{NIPS2016_555d6702,\n author = {Ndiaye, Eugene and Fercoq, Olivier and Gramfort, Alexandre and Salmon, Joseph},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {GAP Safe Screening Rules for Sparse-Group Lasso},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/555d6702c950ecb729a966504af0a635-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/555d6702c950ecb729a966504af0a635-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/555d6702c950ecb729a966504af0a635-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/555d6702c950ecb729a966504af0a635-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/555d6702c950ecb729a966504af0a635-Reviews.html", "metareview": "", "pdf_size": 835991, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15662759148488604556&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "LTCI, CNRS, T\u00e9l\u00e9com ParisTech; LTCI, CNRS, T\u00e9l\u00e9com ParisTech; LTCI, CNRS, T\u00e9l\u00e9com ParisTech; LTCI, CNRS, T\u00e9l\u00e9com ParisTech", "aff_domain": "telecom-paristech.fr;telecom-paristech.fr;telecom-paristech.fr;telecom-paristech.fr", "email": "telecom-paristech.fr;telecom-paristech.fr;telecom-paristech.fr;telecom-paristech.fr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/555d6702c950ecb729a966504af0a635-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "T\u00e9l\u00e9com ParisTech", "aff_unique_dep": "LTCI", "aff_unique_url": "https://www.telecom-paristech.fr", "aff_unique_abbr": "T\u00e9l\u00e9com ParisTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Gaussian Process Bandit Optimisation with Multi-fidelity Evaluations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6982", "id": "6982", "author_site": "Kirthevasan Kandasamy, Gautam Dasarathy, Junier B Oliva, Jeff Schneider, Barnabas Poczos", "author": "Kirthevasan Kandasamy; Gautam Dasarathy; Junier B Oliva; Jeff Schneider; Barnabas Poczos", "abstract": "In many scientific and engineering applications, we are tasked with the optimisation of an expensive to evaluate black box function $\\func$. Traditional methods for this problem assume just the availability of this single function. However, in many cases, cheap approximations to $\\func$ may be obtainable. For example, the expensive real world behaviour of a robot can be approximated by a cheap computer simulation. We can use these approximations to eliminate low function value regions cheaply and use the expensive evaluations of $\\func$ in a small but promising region and speedily identify the optimum. We formalise this task as a \\emph{multi-fidelity} bandit problem where the target function and its approximations are sampled from a Gaussian process. We develop \\mfgpucb, a novel method based on upper confidence bound techniques. In our theoretical analysis we demonstrate that it exhibits precisely the above behaviour, and achieves better regret than strategies which ignore multi-fidelity information. \\mfgpucbs outperforms such naive strategies and other multi-fidelity methods on several synthetic and real experiments.", "bibtex": "@inproceedings{NIPS2016_605ff764,\n author = {Kandasamy, Kirthevasan and Dasarathy, Gautam and Oliva, Junier B and Schneider, Jeff and Poczos, Barnabas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Gaussian Process Bandit Optimisation with Multi-fidelity Evaluations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/605ff764c617d3cd28dbbdd72be8f9a2-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/605ff764c617d3cd28dbbdd72be8f9a2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/605ff764c617d3cd28dbbdd72be8f9a2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/605ff764c617d3cd28dbbdd72be8f9a2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/605ff764c617d3cd28dbbdd72be8f9a2-Reviews.html", "metareview": "", "pdf_size": 809922, "gs_citation": 217, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4640408860441809399&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Carnegie Mellon University; Rice University; Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University", "aff_domain": "cs.cmu.edu;rice.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;rice.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/605ff764c617d3cd28dbbdd72be8f9a2-Abstract.html", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Carnegie Mellon University;Rice University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.rice.edu", "aff_unique_abbr": "CMU;Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Gaussian Processes for Survival Analysis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7051", "id": "7051", "author_site": "Tamara Fernandez, Nicolas Rivera, Yee Whye Teh", "author": "Tamara Fernandez; Nicolas Rivera; Yee Whye Teh", "abstract": "We introduce a semi-parametric Bayesian model for survival analysis. The model is centred on a parametric baseline hazard, and uses a Gaussian process to model variations away from it nonparametrically, as well as dependence on covariates. As opposed to many other methods in survival analysis, our framework does not impose unnecessary constraints in the hazard rate or in the survival function. Furthermore, our model handles left, right and interval censoring mechanisms common in survival analysis. We propose a MCMC algorithm to perform inference and an approximation scheme based on random Fourier features to make computations faster. We report experimental results on synthetic and real data, showing that our model performs better than competing models such as Cox proportional hazards, ANOVA-DDP and random survival forests.", "bibtex": "@inproceedings{NIPS2016_ef1e491a,\n author = {Fernandez, Tamara and Rivera, Nicolas and Teh, Yee Whye},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Gaussian Processes for Survival Analysis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ef1e491a766ce3127556063d49bc2f98-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ef1e491a766ce3127556063d49bc2f98-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ef1e491a766ce3127556063d49bc2f98-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ef1e491a766ce3127556063d49bc2f98-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ef1e491a766ce3127556063d49bc2f98-Reviews.html", "metareview": "", "pdf_size": 704482, "gs_citation": 112, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1003003481370784505&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of Statistics, University of Oxford; Department of Informatics, King\u2019s College London; Department of Statistics, University of Oxford", "aff_domain": "stats.ox.ac.uk;kcl.ac.uk;stats.ox.ac.uk", "email": "stats.ox.ac.uk;kcl.ac.uk;stats.ox.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ef1e491a766ce3127556063d49bc2f98-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Oxford;King\u2019s College London", "aff_unique_dep": "Department of Statistics;Department of Informatics", "aff_unique_url": "https://www.ox.ac.uk;https://www.kcl.ac.uk", "aff_unique_abbr": "Oxford;KCL", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Oxford;London", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "General Tensor Spectral Co-clustering for Higher-Order Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7362", "id": "7362", "author_site": "Tao Wu, Austin Benson, David Gleich", "author": "Tao Wu; Austin R Benson; David F Gleich", "abstract": "Spectral clustering and co-clustering are well-known techniques in data analysis, and recent work has extended spectral clustering to square, symmetric tensors and hypermatrices derived from a network. We develop a new tensor spectral co-clustering method that simultaneously clusters the rows, columns, and slices of a nonnegative three-mode tensor and generalizes to tensors with any number of modes. The algorithm is based on a new random walk model which we call the super-spacey random surfer. We show that our method out-performs state-of-the-art co-clustering methods on several synthetic datasets with ground truth clusters and then use the algorithm to analyze several real-world datasets.", "bibtex": "@inproceedings{NIPS2016_fe51510c,\n author = {Wu, Tao and Benson, Austin R and Gleich, David F},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {General Tensor Spectral Co-clustering for Higher-Order Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fe51510c80bfd6e5d78a164cd5b1f688-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fe51510c80bfd6e5d78a164cd5b1f688-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/fe51510c80bfd6e5d78a164cd5b1f688-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fe51510c80bfd6e5d78a164cd5b1f688-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fe51510c80bfd6e5d78a164cd5b1f688-Reviews.html", "metareview": "", "pdf_size": 851764, "gs_citation": 81, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5461843776516839837&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Purdue University; Stanford University; Purdue University", "aff_domain": "purdue.edu;stanford.edu;purdue.edu", "email": "purdue.edu;stanford.edu;purdue.edu", "github": "https://github.com/wutao27/GtensorSC", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fe51510c80bfd6e5d78a164cd5b1f688-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Purdue University;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://www.stanford.edu", "aff_unique_abbr": "Purdue;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Generalization of ERM in Stochastic Convex Optimization: The Dimension Strikes Back", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7418", "id": "7418", "author": "Vitaly Feldman", "abstract": "In stochastic convex optimization the goal is to minimize a convex function $F(x) \\doteq \\E_{f\\sim D}[f(x)]$ over a convex set $\\K \\subset \\R^d$ where $D$ is some unknown distribution and each $f(\\cdot)$ in the support of $D$ is convex over $\\K$. The optimization is based on i.i.d.~samples $f^1,f^2,\\ldots,f^n$ from $D$. A common approach to such problems is empirical risk minimization (ERM) that optimizes $F_S(x) \\doteq \\frac{1}{n}\\sum_{i\\leq n} f^i(x)$. Here we consider the question of how many samples are necessary for ERM to succeed and the closely related question of uniform convergence of $F_S$ to $F$ over $\\K$. We demonstrate that in the standard $\\ell_p/\\ell_q$ setting of Lipschitz-bounded functions over a $\\K$ of bounded radius, ERM requires sample size that scales linearly with the dimension $d$. This nearly matches standard upper bounds and improves on $\\Omega(\\log d)$ dependence proved for $\\ell_2/\\ell_2$ setting in (Shalev-Shwartz et al. 2009). In stark contrast, these problems can be solved using dimension-independent number of samples for $\\ell_2/\\ell_2$ setting and $\\log d$ dependence for $\\ell_1/\\ell_\\infty$ setting using other approaches. We also demonstrate that for a more general class of range-bounded (but not Lipschitz-bounded) stochastic convex programs an even stronger gap appears already in dimension 2.", "bibtex": "@inproceedings{NIPS2016_8c01a759,\n author = {Feldman, Vitaly},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generalization of ERM in Stochastic Convex Optimization: The Dimension Strikes Back},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8c01a75941549a705cf7275e41b21f0d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8c01a75941549a705cf7275e41b21f0d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8c01a75941549a705cf7275e41b21f0d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8c01a75941549a705cf7275e41b21f0d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8c01a75941549a705cf7275e41b21f0d-Reviews.html", "metareview": "", "pdf_size": 410520, "gs_citation": 76, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8383572231074099090&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "IBM Research \u2013 Almaden", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8c01a75941549a705cf7275e41b21f0d-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "IBM", "aff_unique_dep": "IBM Research", "aff_unique_url": "https://www.ibm.com/research", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "0", "aff_campus_unique": "Almaden", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Generalized Correspondence-LDA Models (GC-LDA) for Identifying Functional Regions in the Brain", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7234", "id": "7234", "author_site": "Timothy Rubin, Sanmi Koyejo, Michael Jones, Tal Yarkoni", "author": "Timothy Rubin; Oluwasanmi O Koyejo; Michael N. Jones; Tal Yarkoni", "abstract": "This paper presents Generalized Correspondence-LDA (GC-LDA), a generalization of the Correspondence-LDA model that allows for variable spatial representations to be associated with topics, and increased flexibility in terms of the strength of the correspondence between data types induced by the model. We present three variants of GC-LDA, each of which associates topics with a different spatial representation, and apply them to a corpus of neuroimaging data. In the context of this dataset, each topic corresponds to a functional brain region, where the region's spatial extent is captured by a probability distribution over neural activity, and the region's cognitive function is captured by a probability distribution over linguistic terms. We illustrate the qualitative improvements offered by GC-LDA in terms of the types of topics extracted with alternative spatial representations, as well as the model's ability to incorporate a-priori knowledge from the neuroimaging literature. We furthermore demonstrate that the novel features of GC-LDA improve predictions for missing data.", "bibtex": "@inproceedings{NIPS2016_6a10bbd4,\n author = {Rubin, Timothy and Koyejo, Oluwasanmi O and Jones, Michael N and Yarkoni, Tal},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generalized Correspondence-LDA Models (GC-LDA) for Identifying Functional Regions in the Brain},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6a10bbd480e4c5573d8f3af73ae0454b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6a10bbd480e4c5573d8f3af73ae0454b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6a10bbd480e4c5573d8f3af73ae0454b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6a10bbd480e4c5573d8f3af73ae0454b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6a10bbd480e4c5573d8f3af73ae0454b-Reviews.html", "metareview": "", "pdf_size": 9361222, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12920092086205009152&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "SurveyMonkey; Univ. of Illinois, Urbana-Champaign; Indiana University; University of Texas at Austin", "aff_domain": "; ; ; ", "email": "; ; ; ", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6a10bbd480e4c5573d8f3af73ae0454b-Abstract.html", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "SurveyMonkey;University of Illinois Urbana-Champaign;Indiana University;University of Texas at Austin", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.surveymonkey.com;https://illinois.edu;https://www.indiana.edu;https://www.utexas.edu", "aff_unique_abbr": "SurveyMonkey;UIUC;IU;UT Austin", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Urbana-Champaign;Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Generating Images with Perceptual Similarity Metrics based on Deep Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6912", "id": "6912", "author_site": "Alexey Dosovitskiy, Thomas Brox", "author": "Alexey Dosovitskiy; Thomas Brox", "abstract": "We propose a class of loss functions, which we call deep perceptual similarity metrics (DeePSiM), allowing to generate sharp high resolution images from compressed abstract representations. Instead of computing distances in the image space, we compute distances between image features extracted by deep neural networks. This metric reflects perceptual similarity of images much better and, thus, leads to better results. We demonstrate two examples of use cases of the proposed loss: (1) networks that invert the AlexNet convolutional network; (2) a modified version of a variational autoencoder that generates realistic high-resolution random images.", "bibtex": "@inproceedings{NIPS2016_371bce7d,\n author = {Dosovitskiy, Alexey and Brox, Thomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generating Images with Perceptual Similarity Metrics based on Deep Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/371bce7dc83817b7893bcdeed13799b5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/371bce7dc83817b7893bcdeed13799b5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/371bce7dc83817b7893bcdeed13799b5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/371bce7dc83817b7893bcdeed13799b5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/371bce7dc83817b7893bcdeed13799b5-Reviews.html", "metareview": "", "pdf_size": 2709982, "gs_citation": 1422, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1915913556489044934&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 11, "aff": "University of Freiburg; University of Freiburg", "aff_domain": "cs.uni-freiburg.de;cs.uni-freiburg.de", "email": "cs.uni-freiburg.de;cs.uni-freiburg.de", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/371bce7dc83817b7893bcdeed13799b5-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Freiburg", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-freiburg.de", "aff_unique_abbr": "UoF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "title": "Generating Long-term Trajectories Using Deep Hierarchical Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6932", "id": "6932", "author_site": "Stephan Zheng, Yisong Yue, Patrick Lucey", "author": "Stephan Zheng; Yisong Yue; Jennifer Hobbs", "abstract": "We study the problem of modeling spatiotemporal trajectories over long time horizons using expert demonstrations. For instance, in sports, agents often choose action sequences with long-term goals in mind, such as achieving a certain strategic position. Conventional policy learning approaches, such as those based on Markov decision processes, generally fail at learning cohesive long-term behavior in such high-dimensional state spaces, and are only effective when fairly myopic decision-making yields the desired behavior. The key difficulty is that conventional models are ``single-scale'' and only learn a single state-action policy. We instead propose a hierarchical policy class that automatically reasons about both long-term and short-term goals, which we instantiate as a hierarchical neural network. We showcase our approach in a case study on learning to imitate demonstrated basketball trajectories, and show that it generates significantly more realistic trajectories compared to non-hierarchical baselines as judged by professional sports analysts.", "bibtex": "@inproceedings{NIPS2016_fe8c15fe,\n author = {Zheng, Stephan and Yue, Yisong and Hobbs, Jennifer},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generating Long-term Trajectories Using Deep Hierarchical Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fe8c15fed5f808006ce95eddb7366e35-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fe8c15fed5f808006ce95eddb7366e35-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/fe8c15fed5f808006ce95eddb7366e35-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fe8c15fed5f808006ce95eddb7366e35-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fe8c15fed5f808006ce95eddb7366e35-Reviews.html", "metareview": "", "pdf_size": 958839, "gs_citation": 130, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7713013798931402092&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "Caltech; Caltech; STATS", "aff_domain": "caltech.edu;caltech.edu;stats.com", "email": "caltech.edu;caltech.edu;stats.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fe8c15fed5f808006ce95eddb7366e35-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "California Institute of Technology;Statistics", "aff_unique_dep": ";", "aff_unique_url": "https://www.caltech.edu;", "aff_unique_abbr": "Caltech;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pasadena;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "title": "Generating Videos with Scene Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7257", "id": "7257", "author_site": "Carl Vondrick, Hamed Pirsiavash, Antonio Torralba", "author": "Carl Vondrick; Hamed Pirsiavash; Antonio Torralba", "abstract": "We capitalize on large amounts of unlabeled video in order to learn a model of scene dynamics for both video recognition tasks (e.g. action classification) and video generation tasks (e.g. future prediction). We propose a generative adversarial network for video with a spatio-temporal convolutional architecture that untangles the scene's foreground from the background. Experiments suggest this model can generate tiny videos up to a second at full frame rate better than simple baselines, and we show its utility at predicting plausible futures of static images. Moreover, experiments and visualizations show the model internally learns useful features for recognizing actions with minimal supervision, suggesting scene dynamics are a promising signal for representation learning. We believe generative video models can impact many applications in video understanding and simulation.", "bibtex": "@inproceedings{NIPS2016_04025959,\n author = {Vondrick, Carl and Pirsiavash, Hamed and Torralba, Antonio},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generating Videos with Scene Dynamics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/04025959b191f8f9de3f924f0940515f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/04025959b191f8f9de3f924f0940515f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/04025959b191f8f9de3f924f0940515f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/04025959b191f8f9de3f924f0940515f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/04025959b191f8f9de3f924f0940515f-Reviews.html", "metareview": "", "pdf_size": 1981581, "gs_citation": 1890, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12629733064507558057&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "MIT; UMBC; MIT", "aff_domain": "mit.edu;umbc.edu;mit.edu", "email": "mit.edu;umbc.edu;mit.edu", "github": "", "project": "http://mit.edu/vondrick/tinyvideo", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/04025959b191f8f9de3f924f0940515f-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;University of Maryland, Baltimore County", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.umbc.edu", "aff_unique_abbr": "MIT;UMBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Generative Adversarial Imitation Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7073", "id": "7073", "author_site": "Jonathan Ho, Stefano Ermon", "author": "Jonathan Ho; Stefano Ermon", "abstract": "Consider learning a policy from example expert behavior, without interaction with the expert or access to a reinforcement signal. One approach is to recover the expert's cost function with inverse reinforcement learning, then extract a policy from that cost function with reinforcement learning. This approach is indirect and can be slow. We propose a new general framework for directly extracting a policy from data as if it were obtained by reinforcement learning following inverse reinforcement learning. We show that a certain instantiation of our framework draws an analogy between imitation learning and generative adversarial networks, from which we derive a model-free imitation learning algorithm that obtains significant performance gains over existing model-free methods in imitating complex behaviors in large, high-dimensional environments.", "bibtex": "@inproceedings{NIPS2016_cc7e2b87,\n author = {Ho, Jonathan and Ermon, Stefano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generative Adversarial Imitation Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/cc7e2b878868cbae992d1fb743995d8f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/cc7e2b878868cbae992d1fb743995d8f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/cc7e2b878868cbae992d1fb743995d8f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/cc7e2b878868cbae992d1fb743995d8f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/cc7e2b878868cbae992d1fb743995d8f-Reviews.html", "metareview": "", "pdf_size": 443459, "gs_citation": 4130, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9944023855119495996&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 17, "aff": "OpenAI; Stanford University", "aff_domain": "openai.com;cs.stanford.edu", "email": "openai.com;cs.stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/cc7e2b878868cbae992d1fb743995d8f-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "OpenAI;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://openai.com;https://www.stanford.edu", "aff_unique_abbr": "OpenAI;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Generative Shape Models: Joint Text Recognition and Segmentation with Very Little Training Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7314", "id": "7314", "author_site": "Xinghua Lou, Ken Kansky, Wolfgang Lehrach, CC Laan, Bhaskara Marthi, D. Phoenix, Dileep George", "author": "Xinghua Lou; Ken Kansky; Wolfgang Lehrach; CC Laan; Bhaskara Marthi; D. Phoenix; Dileep George", "abstract": "We demonstrate that a generative model for object shapes can achieve state of the art results on challenging scene text recognition tasks, and with orders of magnitude fewer training images than required for competing discriminative methods. In addition to transcribing text from challenging images, our method performs fine-grained instance segmentation of characters. We show that our model is more robust to both affine transformations and non-affine deformations compared to previous approaches.", "bibtex": "@inproceedings{NIPS2016_23ad3e31,\n author = {Lou, Xinghua and Kansky, Ken and Lehrach, Wolfgang and Laan, CC and Marthi, Bhaskara and Phoenix, D. and George, Dileep},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Generative Shape Models: Joint Text Recognition and Segmentation with Very Little Training Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/23ad3e314e2a2b43b4c720507cec0723-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/23ad3e314e2a2b43b4c720507cec0723-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/23ad3e314e2a2b43b4c720507cec0723-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/23ad3e314e2a2b43b4c720507cec0723-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/23ad3e314e2a2b43b4c720507cec0723-Reviews.html", "metareview": "", "pdf_size": 5264376, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11924631028594426761&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Vicarious FPC Inc., San Francisco, USA; Vicarious FPC Inc., San Francisco, USA; Vicarious FPC Inc., San Francisco, USA; Vicarious FPC Inc., San Francisco, USA; Vicarious FPC Inc., San Francisco, USA; Vicarious FPC Inc., San Francisco, USA; Vicarious FPC Inc., San Francisco, USA", "aff_domain": "vicarious.com;vicarious.com;vicarious.com;vicarious.com;vicarious.com;vicarious.com;vicarious.com", "email": "vicarious.com;vicarious.com;vicarious.com;vicarious.com;vicarious.com;vicarious.com;vicarious.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/23ad3e314e2a2b43b4c720507cec0723-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Vicarious FPC Inc.", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "San Francisco", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Geometric Dirichlet Means Algorithm for topic inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6988", "id": "6988", "author_site": "Mikhail Yurochkin, XuanLong Nguyen", "author": "Mikhail Yurochkin; Xuanlong Nguyen", "abstract": "We propose a geometric algorithm for topic learning and inference that is built on the convex geometry of topics arising from the Latent Dirichlet Allocation (LDA) model and its nonparametric extensions. To this end we study the optimization of a geometric loss function, which is a surrogate to the LDA's likelihood. Our method involves a fast optimization based weighted clustering procedure augmented with geometric corrections, which overcomes the computational and statistical inefficiencies encountered by other techniques based on Gibbs sampling and variational inference, while achieving the accuracy comparable to that of a Gibbs sampler. The topic estimates produced by our method are shown to be statistically consistent under some conditions. The algorithm is evaluated with extensive experiments on simulated and real data.", "bibtex": "@inproceedings{NIPS2016_a0872cc5,\n author = {Yurochkin, Mikhail and Nguyen, XuanLong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Geometric Dirichlet Means Algorithm for topic inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a0872cc5b5ca4cc25076f3d868e1bdf8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a0872cc5b5ca4cc25076f3d868e1bdf8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a0872cc5b5ca4cc25076f3d868e1bdf8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a0872cc5b5ca4cc25076f3d868e1bdf8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a0872cc5b5ca4cc25076f3d868e1bdf8-Reviews.html", "metareview": "", "pdf_size": 458456, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11210118851383858616&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Statistics, University of Michigan; Department of Statistics, University of Michigan", "aff_domain": "umich.edu;umich.edu", "email": "umich.edu;umich.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a0872cc5b5ca4cc25076f3d868e1bdf8-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "Department of Statistics", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Global Analysis of Expectation Maximization for Mixtures of Two Gaussians", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7216", "id": "7216", "author_site": "Ji Xu, Daniel Hsu, Arian Maleki", "author": "Ji Xu; Daniel J. Hsu; Arian Maleki", "abstract": "Expectation Maximization (EM) is among the most popular algorithms for estimating parameters of statistical models. However, EM, which is an iterative algorithm based on the maximum likelihood principle, is generally only guaranteed to find stationary points of the likelihood objective, and these points may be far from any maximizer. This article addresses this disconnect between the statistical principles behind EM and its algorithmic properties. Specifically, it provides a global analysis of EM for specific models in which the observations comprise an i.i.d. sample from a mixture of two Gaussians. This is achieved by (i) studying the sequence of parameters from idealized execution of EM in the infinite sample limit, and fully characterizing the limit points of the sequence in terms of the initial parameters; and then (ii) based on this convergence analysis, establishing statistical consistency (or lack thereof) for the actual sequence of parameters produced by EM.", "bibtex": "@inproceedings{NIPS2016_792c7b5a,\n author = {Xu, Ji and Hsu, Daniel J and Maleki, Arian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Global Analysis of Expectation Maximization for Mixtures of Two Gaussians},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/792c7b5aae4a79e78aaeda80516ae2ac-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/792c7b5aae4a79e78aaeda80516ae2ac-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/792c7b5aae4a79e78aaeda80516ae2ac-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/792c7b5aae4a79e78aaeda80516ae2ac-Reviews.html", "metareview": "", "pdf_size": 285604, "gs_citation": 173, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15873504364511001497&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Columbia University; Columbia University; Columbia University", "aff_domain": "cs.columbia.edu;cs.columbia.edu;stat.columbia.edu", "email": "cs.columbia.edu;cs.columbia.edu;stat.columbia.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/792c7b5aae4a79e78aaeda80516ae2ac-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Global Optimality of Local Search for Low Rank Matrix Recovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7053", "id": "7053", "author_site": "Srinadh Bhojanapalli, Behnam Neyshabur, Nati Srebro", "author": "Srinadh Bhojanapalli; Behnam Neyshabur; Nati Srebro", "abstract": "We show that there are no spurious local minima in the non-convex factorized parametrization of low-rank matrix recovery from incoherent linear measurements. With noisy measurements we show all local minima are very close to a global optimum. Together with a curvature bound at saddle points, this yields a polynomial time global convergence guarantee for stochastic gradient descent {\\em from random initialization}.", "bibtex": "@inproceedings{NIPS2016_b139e104,\n author = {Bhojanapalli, Srinadh and Neyshabur, Behnam and Srebro, Nati},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Global Optimality of Local Search for Low Rank Matrix Recovery},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b139e104214a08ae3f2ebcce149cdf6e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b139e104214a08ae3f2ebcce149cdf6e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b139e104214a08ae3f2ebcce149cdf6e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b139e104214a08ae3f2ebcce149cdf6e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b139e104214a08ae3f2ebcce149cdf6e-Reviews.html", "metareview": "", "pdf_size": 588645, "gs_citation": 475, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13377495735395555335&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Toyota Technological Institute at Chicago; Toyota Technological Institute at Chicago; Toyota Technological Institute at Chicago", "aff_domain": "ttic.edu;ttic.edu;ttic.edu", "email": "ttic.edu;ttic.edu;ttic.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b139e104214a08ae3f2ebcce149cdf6e-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Toyota Technological Institute at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.tti-chicago.org", "aff_unique_abbr": "TTI Chicago", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Globally Optimal Training of Generalized Polynomial Neural Networks with Nonlinear Spectral Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7131", "id": "7131", "author_site": "Antoine Gautier, Quynh Nguyen, Matthias Hein", "author": "Antoine Gautier; Quynh N Nguyen; Matthias Hein", "abstract": "The optimization problem behind neural networks is highly non-convex. Training with stochastic gradient descent and variants requires careful parameter tuning and provides no guarantee to achieve the global optimum. In contrast we show under quite weak assumptions on the data that a particular class of feedforward neural networks can be trained globally optimal with a linear convergence rate. Up to our knowledge this is the first practically feasible method which achieves such a guarantee. While the method can in principle be applied to deep networks, we restrict ourselves for simplicity in this paper to one- and two hidden layer networks. Our experiments confirms that these models are already rich enough to achieve good performance on a series of real-world datasets.", "bibtex": "@inproceedings{NIPS2016_1f4477ba,\n author = {Gautier, Antoine and Nguyen, Quynh N and Hein, Matthias},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Globally Optimal Training of Generalized Polynomial Neural Networks with Nonlinear Spectral Methods},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1f4477bad7af3616c1f933a02bfabe4e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1f4477bad7af3616c1f933a02bfabe4e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/1f4477bad7af3616c1f933a02bfabe4e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1f4477bad7af3616c1f933a02bfabe4e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1f4477bad7af3616c1f933a02bfabe4e-Reviews.html", "metareview": "", "pdf_size": 689064, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10899432271016885718&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1f4477bad7af3616c1f933a02bfabe4e-Abstract.html" }, { "title": "Gradient-based Sampling: An Adaptive Importance Sampling for Least-squares", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6935", "id": "6935", "author": "Rong Zhu", "abstract": "In modern data analysis, random sampling is an efficient and widely-used strategy to overcome the computational difficulties brought by large sample size. In previous studies, researchers conducted random sampling which is according to the input data but independent on the response variable, however the response variable may also be informative for sampling. In this paper we propose an adaptive sampling called the gradient-based sampling which is dependent on both the input data and the output for fast solving of least-square (LS) problems. We draw the data points by random sampling from the full data according to their gradient values. This sampling is computationally saving, since the running time of computing the sampling probabilities is reduced to O(nd) where n is the full sample size and d is the dimension of the input. Theoretically, we establish an error bound analysis of the general importance sampling with respect to LS solution from full data. The result establishes an improved performance of the use of our gradient-based sampling. Synthetic and real data sets are used to empirically argue that the gradient-based sampling has an obvious advantage over existing sampling methods from two aspects of statistical efficiency and computational saving.", "bibtex": "@inproceedings{NIPS2016_9188905e,\n author = {Zhu, Rong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Gradient-based Sampling: An Adaptive Importance Sampling for Least-squares},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9188905e74c28e489b44e954ec0b9bca-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9188905e74c28e489b44e954ec0b9bca-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9188905e74c28e489b44e954ec0b9bca-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9188905e74c28e489b44e954ec0b9bca-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9188905e74c28e489b44e954ec0b9bca-Reviews.html", "metareview": "", "pdf_size": 310002, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13983762724183755772&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Beijing, China", "aff_domain": "amss.ac.cn", "email": "amss.ac.cn", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9188905e74c28e489b44e954ec0b9bca-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Academy of Mathematics and Systems Science", "aff_unique_url": "http://www.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Graph Clustering: Block-models and model free results", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6891", "id": "6891", "author_site": "Yali Wan, Marina Meila", "author": "Yali Wan; Marina Meila", "abstract": "Clustering graphs under the Stochastic Block Model (SBM) and extensions are well studied. Guarantees of correctness exist under the assumption that the data is sampled from a model. In this paper, we propose a framework, in which we obtain \"correctness\" guarantees without assuming the data comes from a model. The guarantees we obtain depend instead on the statistics of the data that can be checked. We also show that this framework ties in with the existing model-based framework, and that we can exploit results in model-based recovery, as well as strengthen the results existing in that area of research.", "bibtex": "@inproceedings{NIPS2016_286674e3,\n author = {Wan, Yali and Meila, Marina},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Graph Clustering: Block-models and model free results},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/286674e3082feb7e5afb92777e48821f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/286674e3082feb7e5afb92777e48821f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/286674e3082feb7e5afb92777e48821f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/286674e3082feb7e5afb92777e48821f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/286674e3082feb7e5afb92777e48821f-Reviews.html", "metareview": "", "pdf_size": 573072, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12486694793706221496&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Department of Statistics, University of Washington; Department of Statistics, University of Washington", "aff_domain": "washington.edu;stat.washington.edu", "email": "washington.edu;stat.washington.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/286674e3082feb7e5afb92777e48821f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Department of Statistics", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Graphical Time Warping for Joint Alignment of Multiple Curves", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7133", "id": "7133", "author_site": "Yizhi Wang, David J Miller, Kira Poskanzer, Yue Wang, Lin Tian, Guoqiang Yu", "author": "Yizhi Wang; David J. Miller; Kira Poskanzer; Yue Wang; Lin Tian; Guoqiang Yu", "abstract": "Dynamic time warping (DTW) is a fundamental technique in time series analysis for comparing one curve to another using a flexible time-warping function. However, it was designed to compare a single pair of curves. In many applications, such as in metabolomics and image series analysis, alignment is simultaneously needed for multiple pairs. Because the underlying warping functions are often related, independent application of DTW to each pair is a sub-optimal solution. Yet, it is largely unknown how to efficiently conduct a joint alignment with all warping functions simultaneously considered, since any given warping function is constrained by the others and dynamic programming cannot be applied. In this paper, we show that the joint alignment problem can be transformed into a network flow problem and thus can be exactly and efficiently solved by the max flow algorithm, with a guarantee of global optimality. We name the proposed approach graphical time warping (GTW), emphasizing the graphical nature of the solution and that the dependency structure of the warping functions can be represented by a graph. Modifications of DTW, such as windowing and weighting, are readily derivable within GTW. We also discuss optimal tuning of parameters and hyperparameters in GTW. We illustrate the power of GTW using both synthetic data and a real case study of an astrocyte calcium movie.", "bibtex": "@inproceedings{NIPS2016_f0bbac6f,\n author = {Wang, Yizhi and Miller, David J and Poskanzer, Kira and Wang, Yue and Tian, Lin and Yu, Guoqiang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Graphical Time Warping for Joint Alignment of Multiple Curves},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f0bbac6fa079f1e00b2c14c1d3c6ccf0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f0bbac6fa079f1e00b2c14c1d3c6ccf0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f0bbac6fa079f1e00b2c14c1d3c6ccf0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f0bbac6fa079f1e00b2c14c1d3c6ccf0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f0bbac6fa079f1e00b2c14c1d3c6ccf0-Reviews.html", "metareview": "", "pdf_size": 4663046, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14397288071128585646&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Virginia Tech; Pennsylvania State University; University of California, San Francisco; Virginia Tech; University of California, Davis; Virginia Tech", "aff_domain": "vt.edu;engr.psu.edu;ucsf.edu;vt.edu;ucdavis.edu;vt.edu", "email": "vt.edu;engr.psu.edu;ucsf.edu;vt.edu;ucdavis.edu;vt.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f0bbac6fa079f1e00b2c14c1d3c6ccf0-Abstract.html", "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "Virginia Tech;Pennsylvania State University;University of California, San Francisco;University of California, Davis", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.vt.edu;https://www.psu.edu;https://www.ucsf.edu;https://www.ucdavis.edu", "aff_unique_abbr": "VT;PSU;UCSF;UC Davis", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";San Francisco;Davis", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Graphons, mergeons, and so on!", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7213", "id": "7213", "author_site": "Justin Eldridge, Mikhail Belkin, Yusu Wang", "author": "Justin Eldridge; Mikhail Belkin; Yusu Wang", "abstract": "In this work we develop a theory of hierarchical clustering for graphs. Our modelling assumption is that graphs are sampled from a graphon, which is a powerful and general model for generating graphs and analyzing large networks. Graphons are a far richer class of graph models than stochastic blockmodels, the primary setting for recent progress in the statistical theory of graph clustering. We define what it means for an algorithm to produce the ``correct\" clustering, give sufficient conditions in which a method is statistically consistent, and provide an explicit algorithm satisfying these properties.", "bibtex": "@inproceedings{NIPS2016_a42a596f,\n author = {Eldridge, Justin and Belkin, Mikhail and Wang, Yusu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Graphons, mergeons, and so on!},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a42a596fc71e17828440030074d15e74-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a42a596fc71e17828440030074d15e74-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a42a596fc71e17828440030074d15e74-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a42a596fc71e17828440030074d15e74-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a42a596fc71e17828440030074d15e74-Reviews.html", "metareview": "", "pdf_size": 148322, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16179913533871611440&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "The Ohio State University; The Ohio State University; The Ohio State University", "aff_domain": "cse.ohio-state.edu;cse.ohio-state.edu;cse.ohio-state.edu", "email": "cse.ohio-state.edu;cse.ohio-state.edu;cse.ohio-state.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a42a596fc71e17828440030074d15e74-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Greedy Feature Construction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6968", "id": "6968", "author_site": "Dino Oglic, Thomas G\u00e4rtner", "author": "Dino Oglic; Thomas G\u00e4rtner", "abstract": "We present an effective method for supervised feature construction. The main goal of the approach is to construct a feature representation for which a set of linear hypotheses is of sufficient capacity -- large enough to contain a satisfactory solution to the considered problem and small enough to allow good generalization from a small number of training examples. We achieve this goal with a greedy procedure that constructs features by empirically fitting squared error residuals. The proposed constructive procedure is consistent and can output a rich set of features. The effectiveness of the approach is evaluated empirically by fitting a linear ridge regression model in the constructed feature space and our empirical results indicate a superior performance of our approach over competing methods.", "bibtex": "@inproceedings{NIPS2016_277a78fc,\n author = {Oglic, Dino and G\\\"{a}rtner, Thomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Greedy Feature Construction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/277a78fc05c8864a170e9a56ceeabc4c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/277a78fc05c8864a170e9a56ceeabc4c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/277a78fc05c8864a170e9a56ceeabc4c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/277a78fc05c8864a170e9a56ceeabc4c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/277a78fc05c8864a170e9a56ceeabc4c-Reviews.html", "metareview": "", "pdf_size": 413713, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8333553148653468201&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Institut f\u00fcr Informatik III, Universit\u00e4t Bonn, Germany\u2021; School of Computer Science, The University of Nottingham, UK\u2020", "aff_domain": "uni-bonn.de;nottingham.ac.uk", "email": "uni-bonn.de;nottingham.ac.uk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/277a78fc05c8864a170e9a56ceeabc4c-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e4t Bonn;University of Nottingham", "aff_unique_dep": "Institut f\u00fcr Informatik III;School of Computer Science", "aff_unique_url": "https://www.uni-bonn.de;https://www.nottingham.ac.uk", "aff_unique_abbr": ";Nottingham", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;United Kingdom" }, { "title": "Guided Policy Search via Approximate Mirror Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7385", "id": "7385", "author_site": "William H Montgomery, Sergey Levine", "author": "William H Montgomery; Sergey Levine", "abstract": "Guided policy search algorithms can be used to optimize complex nonlinear policies, such as deep neural networks, without directly computing policy gradients in the high-dimensional parameter space. Instead, these methods use supervised learning to train the policy to mimic a \u201cteacher\u201d algorithm, such as a trajectory optimizer or a trajectory-centric reinforcement learning method. Guided policy search methods provide asymptotic local convergence guarantees by construction, but it is not clear how much the policy improves within a small, finite number of iterations. We show that guided policy search algorithms can be interpreted as an approximate variant of mirror descent, where the projection onto the constraint manifold is not exact. We derive a new guided policy search algorithm that is simpler and provides appealing improvement and convergence guarantees in simplified convex and linear settings, and show that in the more general nonlinear setting, the error in the projection step can be bounded. We provide empirical results on several simulated robotic manipulation tasks that show that our method is stable and achieves similar or better performance when compared to prior guided policy search methods, with a simpler formulation and fewer hyperparameters.", "bibtex": "@inproceedings{NIPS2016_a00e5eb0,\n author = {Montgomery, William H and Levine, Sergey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Guided Policy Search via Approximate Mirror Descent},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a00e5eb0973d24649a4a920fc53d9564-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a00e5eb0973d24649a4a920fc53d9564-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a00e5eb0973d24649a4a920fc53d9564-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a00e5eb0973d24649a4a920fc53d9564-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a00e5eb0973d24649a4a920fc53d9564-Reviews.html", "metareview": "", "pdf_size": 497650, "gs_citation": 128, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9926969780547331471&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Dept. of Computer Science and Engineering, University of Washington; Dept. of Computer Science and Engineering, University of Washington", "aff_domain": "cs.washington.edu;cs.washington.edu", "email": "cs.washington.edu;cs.washington.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a00e5eb0973d24649a4a920fc53d9564-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Hardness of Online Sleeping Combinatorial Optimization Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7136", "id": "7136", "author_site": "Satyen Kale, Chansoo Lee, David Pal", "author": "Satyen Kale; Chansoo Lee; David Pal", "abstract": "We show that several online combinatorial optimization problems that admit efficient no-regret algorithms become computationally hard in the sleeping setting where a subset of actions becomes unavailable in each round. Specifically, we show that the sleeping versions of these problems are at least as hard as PAC learning DNF expressions, a long standing open problem. We show hardness for the sleeping versions of Online Shortest Paths, Online Minimum Spanning Tree, Online k-Subsets, Online k-Truncated Permutations, Online Minimum Cut, and Online Bipartite Matching. The hardness result for the sleeping version of the Online Shortest Paths problem resolves an open problem presented at COLT 2015 [Koolen et al., 2015].", "bibtex": "@inproceedings{NIPS2016_18426034,\n author = {Kale, Satyen and Lee, Chansoo and Pal, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hardness of Online Sleeping Combinatorial Optimization Problems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/184260348236f9554fe9375772ff966e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/184260348236f9554fe9375772ff966e-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/184260348236f9554fe9375772ff966e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/184260348236f9554fe9375772ff966e-Reviews.html", "metareview": "", "pdf_size": 325052, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12185456880002903923&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Yahoo Research + Google Research; Univ. of Michigan, Ann Arbor; Yahoo Research", "aff_domain": "satyenkale.com;umich.edu;yahoo-inc.com", "email": "satyenkale.com;umich.edu;yahoo-inc.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/184260348236f9554fe9375772ff966e-Abstract.html", "aff_unique_index": "0+1;2;0", "aff_unique_norm": "Yahoo;Google;University of Michigan", "aff_unique_dep": "Yahoo Research;Google Research;", "aff_unique_url": "https://research.yahoo.com;https://research.google;https://www.umich.edu", "aff_unique_abbr": "Yahoo Research;Google Research;UM", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Ann Arbor", "aff_country_unique_index": "0+0;0;0", "aff_country_unique": "United States" }, { "title": "Hierarchical Clustering via Spreading Metrics", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7214", "id": "7214", "author_site": "Aurko Roy, Sebastian Pokutta", "author": "Aurko Roy; Sebastian Pokutta", "abstract": "We study the cost function for hierarchical clusterings introduced by [Dasgupta, 2015] where hierarchies are treated as first-class objects rather than deriving their cost from projections into flat clusters. It was also shown in [Dasgupta, 2015] that a top-down algorithm returns a hierarchical clustering of cost at most (O\\left(\\alpha", "bibtex": "@inproceedings{NIPS2016_4d2e7bd3,\n author = {Roy, Aurko and Pokutta, Sebastian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hierarchical Clustering via Spreading Metrics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/4d2e7bd33c475784381a64e43e50922f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/4d2e7bd33c475784381a64e43e50922f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/4d2e7bd33c475784381a64e43e50922f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/4d2e7bd33c475784381a64e43e50922f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/4d2e7bd33c475784381a64e43e50922f-Reviews.html", "metareview": "", "pdf_size": 441967, "gs_citation": 106, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10711360637811604284&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "College of Computing, Georgia Institute of Technology, Atlanta, GA, USA; ISyE, Georgia Institute of Technology, Atlanta, GA, USA", "aff_domain": "gatech.edu;isye.gatech.edu", "email": "gatech.edu;isye.gatech.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/4d2e7bd33c475784381a64e43e50922f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "College of Computing", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Atlanta", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Hierarchical Deep Reinforcement Learning: Integrating Temporal Abstraction and Intrinsic Motivation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7266", "id": "7266", "author_site": "Tejas Kulkarni, Karthik Narasimhan, Ardavan Saeedi, Josh Tenenbaum", "author": "Tejas D Kulkarni; Karthik Narasimhan; Ardavan Saeedi; Josh Tenenbaum", "abstract": "Part of", "bibtex": "@inproceedings{NIPS2016_f442d33f,\n author = {Kulkarni, Tejas D and Narasimhan, Karthik and Saeedi, Ardavan and Tenenbaum, Josh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hierarchical Deep Reinforcement Learning: Integrating Temporal Abstraction and Intrinsic Motivation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f442d33fa06832082290ad8544a8da27-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f442d33fa06832082290ad8544a8da27-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f442d33fa06832082290ad8544a8da27-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f442d33fa06832082290ad8544a8da27-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f442d33fa06832082290ad8544a8da27-Reviews.html", "metareview": "", "pdf_size": 1147961, "gs_citation": 1578, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17312997916185144890&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "DeepMind, London + CSAIL, MIT; CSAIL, MIT; CSAIL, MIT; BCS, MIT", "aff_domain": "gmail.com;mit.edu;mit.edu;mit.edu", "email": "gmail.com;mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f442d33fa06832082290ad8544a8da27-Abstract.html", "aff_unique_index": "0+1;1;1;1", "aff_unique_norm": "DeepMind;Massachusetts Institute of Technology", "aff_unique_dep": ";Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://deepmind.com;https://www.csail.mit.edu", "aff_unique_abbr": "DeepMind;MIT", "aff_campus_unique_index": "0+1;1;1;1", "aff_campus_unique": "London;Cambridge", "aff_country_unique_index": "0+1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "title": "Hierarchical Object Representation for Open-Ended Object Category Learning and Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7196", "id": "7196", "author_site": "Seyed Hamidreza Kasaei, Ana Maria Tom\u00e9, Lu\u00eds Seabra Lopes", "author": "Seyed Hamidreza Kasaei; Ana Maria Tom\u00e9; Lu\u00eds Seabra Lopes", "abstract": "Most robots lack the ability to learn new objects from past experiences. To migrate a robot to a new environment one must often completely re-generate the knowledge- base that it is running with. Since in open-ended domains the set of categories to be learned is not predefined, it is not feasible to assume that one can pre-program all object categories required by robots. Therefore, autonomous robots must have the ability to continuously execute learning and recognition in a concurrent and interleaved fashion. This paper proposes an open-ended 3D object recognition system which concurrently learns both the object categories and the statistical features for encoding objects. In particular, we propose an extension of Latent Dirichlet Allocation to learn structural semantic features (i.e. topics) from low-level feature co-occurrences for each category independently. Moreover, topics in each category are discovered in an unsupervised fashion and are updated incrementally using new object views. The approach contains similarities with the organization of the visual cortex and builds a hierarchy of increasingly sophisticated representations. Results show the fulfilling performance of this approach on different types of objects. Moreover, this system demonstrates the capability of learning from few training examples and competes with state-of-the-art systems.", "bibtex": "@inproceedings{NIPS2016_299a23a2,\n author = {Kasaei, Seyed Hamidreza and Tom\\'{e}, Ana Maria and Lopes, Lu\\'{\\i}s Seabra},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hierarchical Object Representation for Open-Ended Object Category Learning and Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/299a23a2291e2126b91d54f3601ec162-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/299a23a2291e2126b91d54f3601ec162-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/299a23a2291e2126b91d54f3601ec162-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/299a23a2291e2126b91d54f3601ec162-Reviews.html", "metareview": "", "pdf_size": 1220375, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=544008457472828396&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "IEETA - Instituto de Engenharia Electr\u00f3nica e Telem\u00e1tica de Aveiro, University of Aveiro, Averio, 3810-193, Portugal; IEETA - Instituto de Engenharia Electr\u00f3nica e Telem\u00e1tica de Aveiro, University of Aveiro, Averio, 3810-193, Portugal; IEETA - Instituto de Engenharia Electr\u00f3nica e Telem\u00e1tica de Aveiro, University of Aveiro, Averio, 3810-193, Portugal", "aff_domain": "ua.pt;ua.pt;ua.pt", "email": "ua.pt;ua.pt;ua.pt", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/299a23a2291e2126b91d54f3601ec162-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Aveiro", "aff_unique_dep": "Instituto de Engenharia Electr\u00f3nica e Telem\u00e1tica de Aveiro", "aff_unique_url": "https://www.ua.pt", "aff_unique_abbr": "UA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Aveiro", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Portugal" }, { "title": "Hierarchical Question-Image Co-Attention for Visual Question Answering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7040", "id": "7040", "author_site": "Jiasen Lu, Jianwei Yang, Dhruv Batra, Devi Parikh", "author": "Jiasen Lu; Jianwei Yang; Dhruv Batra; Devi Parikh", "abstract": "A number of recent works have proposed attention models for Visual Question Answering (VQA) that generate spatial maps highlighting image regions relevant to answering the question. In this paper, we argue that in addition to modeling \"where to look\" or visual attention, it is equally important to model \"what words to listen to\" or question attention. We present a novel co-attention model for VQA that jointly reasons about image and question attention. In addition, our model reasons about the question (and consequently the image via the co-attention mechanism) in a hierarchical fashion via a novel 1-dimensional convolution neural networks (CNN). Our model improves the state-of-the-art on the VQA dataset from 60.3% to 60.5%, and from 61.6% to 63.3% on the COCO-QA dataset. By using ResNet, the performance is further improved to 62.1% for VQA and 65.4% for COCO-QA.", "bibtex": "@inproceedings{NIPS2016_9dcb88e0,\n author = {Lu, Jiasen and Yang, Jianwei and Batra, Dhruv and Parikh, Devi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hierarchical Question-Image Co-Attention for Visual Question Answering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9dcb88e0137649590b755372b040afad-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9dcb88e0137649590b755372b040afad-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9dcb88e0137649590b755372b040afad-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9dcb88e0137649590b755372b040afad-Reviews.html", "metareview": "", "pdf_size": 979590, "gs_citation": 2128, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15146345852176060026&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Virginia Tech; Virginia Tech; Virginia Tech + Georgia Institute of Technology; Virginia Tech + Georgia Institute of Technology", "aff_domain": "vt.edu;vt.edu;vt.edu;vt.edu", "email": "vt.edu;vt.edu;vt.edu;vt.edu", "github": "https://github.com/jiasenlu/HieCoAttenVQA", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9dcb88e0137649590b755372b040afad-Abstract.html", "aff_unique_index": "0;0;0+1;0+1", "aff_unique_norm": "Virginia Tech;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.vt.edu;https://www.gatech.edu", "aff_unique_abbr": "VT;Georgia Tech", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+0;0+0", "aff_country_unique": "United States" }, { "title": "High Dimensional Structured Superposition Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7322", "id": "7322", "author_site": "Qilong Gu, Arindam Banerjee", "author": "Qilong Gu; Arindam Banerjee", "abstract": "High dimensional superposition models characterize observations using parameters which can be written as a sum of multiple component parameters, each with its own structure, e.g., sum of low rank and sparse matrices. In this paper, we consider general superposition models which allow sum of any number of component parameters, and each component structure can be characterized by any norm. We present a simple estimator for such models, give a geometric condition under which the components can be accurately estimated, characterize sample complexity of the estimator, and give non-asymptotic bounds on the componentwise estimation error. We use tools from empirical processes and generic chaining for the statistical analysis, and our results, which substantially generalize prior work on superposition models, are in terms of Gaussian widths of suitable spherical caps.", "bibtex": "@inproceedings{NIPS2016_d757719e,\n author = {Gu, Qilong and Banerjee, Arindam},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {High Dimensional Structured Superposition Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d757719ed7c2b66dd17dcee2a3cb29f4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d757719ed7c2b66dd17dcee2a3cb29f4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d757719ed7c2b66dd17dcee2a3cb29f4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d757719ed7c2b66dd17dcee2a3cb29f4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d757719ed7c2b66dd17dcee2a3cb29f4-Reviews.html", "metareview": "", "pdf_size": 367213, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9692006201200082264&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Dept of Computer Science & Engineering, University of Minnesota, Twin Cities; Dept of Computer Science & Engineering, University of Minnesota, Twin Cities", "aff_domain": "cs.umn.edu;cs.umn.edu", "email": "cs.umn.edu;cs.umn.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d757719ed7c2b66dd17dcee2a3cb29f4-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Minnesota", "aff_unique_dep": "Department of Computer Science & Engineering", "aff_unique_url": "https://www.minnesota.edu", "aff_unique_abbr": "UMN", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Twin Cities", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "High resolution neural connectivity from incomplete tracing data using nonnegative spline regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7370", "id": "7370", "author_site": "Kameron Harris, Stefan Mihalas, Eric Shea-Brown", "author": "Kameron D Harris; Stefan Mihalas; Eric Shea-Brown", "abstract": "Whole-brain neural connectivity data are now available from viral tracing experiments, which reveal the connections between a source injection site and elsewhere in the brain. These hold the promise of revealing spatial patterns of connectivity throughout the mammalian brain. To achieve this goal, we seek to fit a weighted, nonnegative adjacency matrix among 100 \u03bcm brain \u201cvoxels\u201d using viral tracer data. Despite a multi-year experimental effort, injections provide incomplete coverage, and the number of voxels in our data is orders of magnitude larger than the number of injections, making the problem severely underdetermined. Furthermore, projection data are missing within the injection site because local connections there are not separable from the injection signal. We use a novel machine-learning algorithm to meet these challenges and develop a spatially explicit, voxel-scale connectivity map of the mouse visual system. Our method combines three features: a matrix completion loss for missing data, a smoothing spline penalty to regularize the problem, and (optionally) a low rank factorization. We demonstrate the consistency of our estimator using synthetic data and then apply it to newly available Allen Mouse Brain Connectivity Atlas data for the visual system. Our algorithm is significantly more predictive than current state of the art approaches which assume regions to be homogeneous. We demonstrate the efficacy of a low rank version on visual cortex data and discuss the possibility of extending this to a whole-brain connectivity matrix at the voxel scale.", "bibtex": "@inproceedings{NIPS2016_f337d999,\n author = {Harris, Kameron D and Mihalas, Stefan and Shea-Brown, Eric},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {High resolution neural connectivity from incomplete tracing data using nonnegative spline regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f337d999d9ad116a7b4f3d409fcc6480-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f337d999d9ad116a7b4f3d409fcc6480-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f337d999d9ad116a7b4f3d409fcc6480-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f337d999d9ad116a7b4f3d409fcc6480-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f337d999d9ad116a7b4f3d409fcc6480-Reviews.html", "metareview": "", "pdf_size": 1842550, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2208590985312030900&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Applied Mathematics, U. of Washington; Allen Institute for Brain Science + Applied Mathematics, U. of Washington; Applied Mathematics, U. of Washington + Allen Institute for Brain Science", "aff_domain": "uw.edu;alleninstitute.org;uw.edu", "email": "uw.edu;alleninstitute.org;uw.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f337d999d9ad116a7b4f3d409fcc6480-Abstract.html", "aff_unique_index": "0;1+0;0+1", "aff_unique_norm": "University of Washington;Allen Institute for Brain Science", "aff_unique_dep": "Department of Applied Mathematics;", "aff_unique_url": "https://www.washington.edu;https://www.alleninstitute.org", "aff_unique_abbr": "UW;Allen Institute", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0;0+0", "aff_country_unique": "United States" }, { "title": "High-Rank Matrix Completion and Clustering under Self-Expressive Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6963", "id": "6963", "author": "Ehsan Elhamifar", "abstract": "We propose efficient algorithms for simultaneous clustering and completion of incomplete high-dimensional data that lie in a union of low-dimensional subspaces. We cast the problem as finding a completion of the data matrix so that each point can be reconstructed as a linear or affine combination of a few data points. Since the problem is NP-hard, we propose a lifting framework and reformulate the problem as a group-sparse recovery of each incomplete data point in a dictionary built using incomplete data, subject to rank-one constraints. To solve the problem efficiently, we propose a rank pursuit algorithm and a convex relaxation. The solution of our algorithms recover missing entries and provides a similarity matrix for clustering. Our algorithms can deal with both low-rank and high-rank matrices, does not suffer from initialization, does not need to know dimensions of subspaces and can work with a small number of data points. By extensive experiments on synthetic data and real problems of video motion segmentation and completion of motion capture data, we show that when the data matrix is low-rank, our algorithm performs on par with or better than low-rank matrix completion methods, while for high-rank data matrices, our method significantly outperforms existing algorithms.", "bibtex": "@inproceedings{NIPS2016_9f61408e,\n author = {Elhamifar, Ehsan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {High-Rank Matrix Completion and Clustering under Self-Expressive Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9f61408e3afb633e50cdf1b20de6f466-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9f61408e3afb633e50cdf1b20de6f466-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9f61408e3afb633e50cdf1b20de6f466-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9f61408e3afb633e50cdf1b20de6f466-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9f61408e3afb633e50cdf1b20de6f466-Reviews.html", "metareview": "", "pdf_size": 636265, "gs_citation": 65, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2697347574559059873&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff": "College of Computer and Information Science, Northeastern University", "aff_domain": "ccs.neu.edu", "email": "ccs.neu.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9f61408e3afb633e50cdf1b20de6f466-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "College of Computer and Information Science", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Higher-Order Factorization Machines", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8505", "id": "8505", "author_site": "Mathieu Blondel, Akinori Fujino, Naonori Ueda, Masakazu Ishihata", "author": "Mathieu Blondel; Akinori Fujino; Naonori Ueda; Masakazu Ishihata", "abstract": "Factorization machines (FMs) are a supervised learning approach that can use second-order feature combinations even when the data is very high-dimensional. Unfortunately, despite increasing interest in FMs, there exists to date no efficient training algorithm for higher-order FMs (HOFMs). In this paper, we present the first generic yet efficient algorithms for training arbitrary-order HOFMs. We also present new variants of HOFMs with shared parameters, which greatly reduce model size and prediction times while maintaining similar accuracy. We demonstrate the proposed approaches on four different link prediction tasks.", "bibtex": "@inproceedings{NIPS2016_158fc2dd,\n author = {Blondel, Mathieu and Fujino, Akinori and Ueda, Naonori and Ishihata, Masakazu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Higher-Order Factorization Machines},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/158fc2ddd52ec2cf54d3c161f2dd6517-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/158fc2ddd52ec2cf54d3c161f2dd6517-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/158fc2ddd52ec2cf54d3c161f2dd6517-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/158fc2ddd52ec2cf54d3c161f2dd6517-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/158fc2ddd52ec2cf54d3c161f2dd6517-Reviews.html", "metareview": "", "pdf_size": 517025, "gs_citation": 270, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5914000800490244894&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/158fc2ddd52ec2cf54d3c161f2dd6517-Abstract.html" }, { "title": "Homotopy Smoothing for Non-Smooth Problems with Lower Complexity than $O(1/\\epsilon)$", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7088", "id": "7088", "author_site": "Yi Xu, Yan Yan, Qihang Lin, Tianbao Yang", "author": "Yi Xu; Yan Yan; Qihang Lin; Tianbao Yang", "abstract": "In this paper, we develop a novel {\\bf ho}moto{\\bf p}y {\\bf s}moothing (HOPS) algorithm for solving a family of non-smooth problems that is composed of a non-smooth term with an explicit max-structure and a smooth term or a simple non-smooth term whose proximal mapping is easy to compute. The best known iteration complexity for solving such non-smooth optimization problems is $O(1/\\epsilon)$ without any assumption on the strong convexity. In this work, we will show that the proposed HOPS achieved a lower iteration complexity of $\\tilde O(1/\\epsilon^{1-\\theta})$ with $\\theta\\in(0,1]$ capturing the local sharpness of the objective function around the optimal solutions. To the best of our knowledge, this is the lowest iteration complexity achieved so far for the considered non-smooth optimization problems without strong convexity assumption. The HOPS algorithm employs Nesterov's smoothing technique and Nesterov's accelerated gradient method and runs in stages, which gradually decreases the smoothing parameter in a stage-wise manner until it yields a sufficiently good approximation of the original function. We show that HOPS enjoys a linear convergence for many well-known non-smooth problems (e.g., empirical risk minimization with a piece-wise linear loss function and $\\ell_1$ norm regularizer, finding a point in a polyhedron, cone programming, etc). Experimental results verify the effectiveness of HOPS in comparison with Nesterov's smoothing algorithm and the primal-dual style of first-order methods.", "bibtex": "@inproceedings{NIPS2016_b5dc4e5d,\n author = {Xu, Yi and Yan, Yan and Lin, Qihang and Yang, Tianbao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Homotopy Smoothing for Non-Smooth Problems with Lower Complexity than O(1/\\textbackslash epsilon)},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b5dc4e5d9b495d0196f61d45b26ef33e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b5dc4e5d9b495d0196f61d45b26ef33e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b5dc4e5d9b495d0196f61d45b26ef33e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b5dc4e5d9b495d0196f61d45b26ef33e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b5dc4e5d9b495d0196f61d45b26ef33e-Reviews.html", "metareview": "", "pdf_size": 367357, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15051546381011483105&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b5dc4e5d9b495d0196f61d45b26ef33e-Abstract.html" }, { "title": "How Deep is the Feature Analysis underlying Rapid Visual Categorization?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7093", "id": "7093", "author_site": "Sven Eberhardt, Jonah G Cader, Thomas Serre", "author": "Sven Eberhardt; Jonah G Cader; Thomas Serre", "abstract": "Rapid categorization paradigms have a long history in experimental psychology: Characterized by short presentation times and speeded behavioral responses, these tasks highlight the efficiency with which our visual system processes natural object categories. Previous studies have shown that feed-forward hierarchical models of the visual cortex provide a good fit to human visual decisions. At the same time, recent work in computer vision has demonstrated significant gains in object recognition accuracy with increasingly deep hierarchical architectures. But it is unclear how well these models account for human visual decisions and what they may reveal about the underlying brain processes. We have conducted a large-scale psychophysics study to assess the correlation between computational models and human behavioral responses on a rapid animal vs. non-animal categorization task. We considered visual representations of varying complexity by analyzing the output of different stages of processing in three state-of-the-art deep networks. We found that recognition accuracy increases with higher stages of visual processing (higher level stages indeed outperforming human participants on the same task) but that human decisions agree best with predictions from intermediate stages. Overall, these results suggest that human participants may rely on visual features of intermediate complexity and that the complexity of visual representations afforded by modern deep network models may exceed the complexity of those used by human participants during rapid categorization.", "bibtex": "@inproceedings{NIPS2016_42e77b63,\n author = {Eberhardt, Sven and Cader, Jonah G and Serre, Thomas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {How Deep is the Feature Analysis underlying Rapid Visual Categorization?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/42e77b63637ab381e8be5f8318cc28a2-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/42e77b63637ab381e8be5f8318cc28a2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/42e77b63637ab381e8be5f8318cc28a2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/42e77b63637ab381e8be5f8318cc28a2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/42e77b63637ab381e8be5f8318cc28a2-Reviews.html", "metareview": "", "pdf_size": 2466862, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6466640642605525659&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Department of Cognitive Linguistic & Psychological Sciences+Brown Institute for Brain Sciences+Brown University; Department of Cognitive Linguistic & Psychological Sciences+Brown Institute for Brain Sciences+Brown University; Department of Cognitive Linguistic & Psychological Sciences+Brown Institute for Brain Sciences+Brown University", "aff_domain": "brown.edu;brown.edu;brown.edu", "email": "brown.edu;brown.edu;brown.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/42e77b63637ab381e8be5f8318cc28a2-Abstract.html", "aff_unique_index": "0+1+1;0+1+1;0+1+1", "aff_unique_norm": "University Affiliation Not Specified;Brown University", "aff_unique_dep": "Department of Cognitive Linguistic & Psychological Sciences;Institute for Brain Sciences", "aff_unique_url": ";https://www.brown.edu", "aff_unique_abbr": ";Brown", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "1+1;1+1;1+1", "aff_country_unique": ";United States" }, { "title": "Human Decision-Making under Limited Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7158", "id": "7158", "author_site": "Pedro Ortega, Alan A Stocker", "author": "Pedro A Ortega; Alan Stocker", "abstract": "Abstract Subjective expected utility theory assumes that decision-makers possess unlimited computational resources to reason about their choices; however, virtually all decisions in everyday life are made under resource constraints---i.e. decision-makers are bounded in their rationality. Here we experimentally tested the predictions made by a formalization of bounded rationality based on ideas from statistical mechanics and information-theory. We systematically tested human subjects in their ability to solve combinatorial puzzles under different time limitations. We found that our bounded-rational model accounts well for the data. The decomposition of the fitted model parameter into the subjects' expected utility function and resource parameter provide interesting insight into the subjects' information capacity limits. Our results confirm that humans gradually fall back on their learned prior choice patterns when confronted with increasing resource limitations.", "bibtex": "@inproceedings{NIPS2016_fc490ca4,\n author = {Ortega, Pedro A and Stocker, Alan A},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Human Decision-Making under Limited Time},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fc490ca45c00b1249bbe3554a4fdf6fb-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fc490ca45c00b1249bbe3554a4fdf6fb-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fc490ca45c00b1249bbe3554a4fdf6fb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fc490ca45c00b1249bbe3554a4fdf6fb-Reviews.html", "metareview": "", "pdf_size": 750496, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17219180096743925043&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Department of Psychology, University of Pennsylvania; Department of Psychology, University of Pennsylvania", "aff_domain": "seas.upenn.edu;sas.upenn.edu", "email": "seas.upenn.edu;sas.upenn.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fc490ca45c00b1249bbe3554a4fdf6fb-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "Department of Psychology", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Hypothesis Testing in Unsupervised Domain Adaptation with Applications in Alzheimer's Disease", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7055", "id": "7055", "author_site": "Hao Zhou, Vamsi Ithapu, Sathya Narayanan Ravi, Vikas Singh, Grace Wahba, Sterling C Johnson", "author": "Hao Zhou; Vamsi K Ithapu; Sathya Narayanan Ravi; Vikas Singh; Grace Wahba; Sterling C Johnson", "abstract": "Consider samples from two different data sources $\\{\\mathbf{x_s^i}\\} \\sim P_{\\rm source}$ and $\\{\\mathbf{x_t^i}\\} \\sim P_{\\rm target}$. We only observe their transformed versions $h(\\mathbf{x_s^i})$ and $g(\\mathbf{x_t^i})$, for some known function class $h(\\cdot)$ and $g(\\cdot)$. Our goal is to perform a statistical test checking if $P_{\\rm source}$ = $P_{\\rm target}$ while removing the distortions induced by the transformations. This problem is closely related to concepts underlying numerous domain adaptation algorithms, and in our case, is motivated by the need to combine clinical and imaging based biomarkers from multiple sites and/or batches, where this problem is fairly common and an impediment in the conduct of analyses with much larger sample sizes. We develop a framework that addresses this problem using ideas from hypothesis testing on the transformed measurements, where in the distortions need to be estimated {\\it in tandem} with the testing. We derive a simple algorithm and study its convergence and consistency properties in detail, and we also provide lower-bound strategies based on recent work in continuous optimization. On a dataset of individuals at risk for neurological disease, our results are competitive with alternative procedures that are twice as expensive and in some cases operationally infeasible to implement.", "bibtex": "@inproceedings{NIPS2016_996009f2,\n author = {Zhou, Hao and Ithapu, Vamsi K and Ravi, Sathya Narayanan and Singh, Vikas and Wahba, Grace and Johnson, Sterling C},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Hypothesis Testing in Unsupervised Domain Adaptation with Applications in Alzheimer\\textquotesingle s Disease},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/996009f2374006606f4c0b0fda878af1-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/996009f2374006606f4c0b0fda878af1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/996009f2374006606f4c0b0fda878af1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/996009f2374006606f4c0b0fda878af1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/996009f2374006606f4c0b0fda878af1-Reviews.html", "metareview": "", "pdf_size": 404138, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5164487326599628587&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/996009f2374006606f4c0b0fda878af1-Abstract.html" }, { "title": "Identification and Overidentification of Linear Structural Equation Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7116", "id": "7116", "author": "Bryant Chen", "abstract": "In this paper, we address the problems of identifying linear structural equation models and discovering the constraints they imply. We first extend the half-trek criterion to cover a broader class of models and apply our extension to finding testable constraints implied by the model. We then show that any semi-Markovian linear model can be recursively decomposed into simpler sub-models, resulting in improved identification and constraint discovery power. Finally, we show that, unlike the existing methods developed for linear models, the resulting method subsumes the identification and constraint discovery algorithms for non-parametric models.", "bibtex": "@inproceedings{NIPS2016_49c9adb1,\n author = {Chen, Bryant},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Identification and Overidentification of Linear Structural Equation Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/49c9adb18e44be0711a94e827042f630-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/49c9adb18e44be0711a94e827042f630-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/49c9adb18e44be0711a94e827042f630-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/49c9adb18e44be0711a94e827042f630-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/49c9adb18e44be0711a94e827042f630-Reviews.html", "metareview": "", "pdf_size": 413244, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12169744241584168112&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/49c9adb18e44be0711a94e827042f630-Abstract.html" }, { "title": "Image Restoration Using Very Deep Convolutional Encoder-Decoder Networks with Symmetric Skip Connections", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6952", "id": "6952", "author_site": "Xiaojiao Mao, Chunhua Shen, Yu-Bin Yang", "author": "Xiaojiao Mao; Chunhua Shen; Yu-Bin Yang", "abstract": "In this paper, we propose a very deep fully convolutional encoding-decoding framework for image restoration such as denoising and super-resolution. The network is composed of multiple layers of convolution and deconvolution operators, learning end-to-end mappings from corrupted images to the original ones. The convolutional layers act as the feature extractor, which capture the abstraction of image contents while eliminating noises/corruptions. Deconvolutional layers are then used to recover the image details. We propose to symmetrically link convolutional and deconvolutional layers with skip-layer connections, with which the training converges much faster and attains a higher-quality local optimum. First, the skip connections allow the signal to be back-propagated to bottom layers directly, and thus tackles the problem of gradient vanishing, making training deep networks easier and achieving restoration performance gains consequently. Second, these skip connections pass image details from convolutional layers to deconvolutional layers, which is beneficial in recovering the original image. Significantly, with the large capacity, we can handle different levels of noises using a single model. Experimental results show that our network achieves better performance than recent state-of-the-art methods.", "bibtex": "@inproceedings{NIPS2016_0ed94223,\n author = {Mao, Xiaojiao and Shen, Chunhua and Yang, Yu-Bin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Image Restoration Using Very Deep Convolutional Encoder-Decoder Networks with Symmetric Skip Connections},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0ed9422357395a0d4879191c66f4faa2-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0ed9422357395a0d4879191c66f4faa2-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0ed9422357395a0d4879191c66f4faa2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0ed9422357395a0d4879191c66f4faa2-Reviews.html", "metareview": "", "pdf_size": 1791953, "gs_citation": 2071, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9889699634650873051&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0ed9422357395a0d4879191c66f4faa2-Abstract.html" }, { "title": "Improved Deep Metric Learning with Multi-class N-pair Loss Objective", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7259", "id": "7259", "author": "Kihyuk Sohn", "abstract": "Deep metric learning has gained much popularity in recent years, following the success of deep learning. However, existing frameworks of deep metric learning based on contrastive loss and triplet loss often suffer from slow convergence, partially because they employ only one negative example while not interacting with the other negative classes in each update. In this paper, we propose to address this problem with a new metric learning objective called multi-class N-pair loss. The proposed objective function firstly generalizes triplet loss by allowing joint comparison among more than one negative examples \u2013 more specifically, N-1 negative examples \u2013 and secondly reduces the computational burden of evaluating deep embedding vectors via an efficient batch construction strategy using only N pairs of examples, instead of (N+1)\u00d7N. We demonstrate the superiority of our proposed loss to the triplet loss as well as other competing loss functions for a variety of tasks on several visual recognition benchmark, including fine-grained object recognition and verification, image clustering and retrieval, and face verification and identification.", "bibtex": "@inproceedings{NIPS2016_6b180037,\n author = {Sohn, Kihyuk},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improved Deep Metric Learning with Multi-class N-pair Loss Objective},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6b180037abbebea991d8b1232f8a8ca9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6b180037abbebea991d8b1232f8a8ca9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6b180037abbebea991d8b1232f8a8ca9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6b180037abbebea991d8b1232f8a8ca9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6b180037abbebea991d8b1232f8a8ca9-Reviews.html", "metareview": "", "pdf_size": 414955, "gs_citation": 2813, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12886354739576660748&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "NEC Laboratories America, Inc.", "aff_domain": "nec-labs.com", "email": "nec-labs.com", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6b180037abbebea991d8b1232f8a8ca9-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "NEC Laboratories America", "aff_unique_dep": "", "aff_unique_url": "https://www.nec-labs.com", "aff_unique_abbr": "NEC Labs America", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Improved Dropout for Shallow and Deep Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8492", "id": "8492", "author_site": "Zhe Li, Boqing Gong, Tianbao Yang", "author": "Zhe Li; Boqing Gong; Tianbao Yang", "abstract": "Dropout has been witnessed with great success in training deep neural networks by independently zeroing out the outputs of neurons at random. It has also received a surge of interest for shallow learning, e.g., logistic regression. However, the independent sampling for dropout could be suboptimal for the sake of convergence. In this paper, we propose to use multinomial sampling for dropout, i.e., sampling features or neurons according to a multinomial distribution with different probabilities for different features/neurons. To exhibit the optimal dropout probabilities, we analyze the shallow learning with multinomial dropout and establish the risk bound for stochastic optimization. By minimizing a sampling dependent factor in the risk bound, we obtain a distribution-dependent dropout with sampling probabilities dependent on the second order statistics of the data distribution. To tackle the issue of evolving distribution of neurons in deep learning, we propose an efficient adaptive dropout (named \\textbf{evolutional dropout}) that computes the sampling probabilities on-the-fly from a mini-batch of examples. Empirical studies on several benchmark datasets demonstrate that the proposed dropouts achieve not only much faster convergence and but also a smaller testing error than the standard dropout. For example, on the CIFAR-100 data, the evolutional dropout achieves relative improvements over 10\\% on the prediction performance and over 50\\% on the convergence speed compared to the standard dropout.", "bibtex": "@inproceedings{NIPS2016_7bb06076,\n author = {Li, Zhe and Gong, Boqing and Yang, Tianbao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improved Dropout for Shallow and Deep Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7bb060764a818184ebb1cc0d43d382aa-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7bb060764a818184ebb1cc0d43d382aa-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7bb060764a818184ebb1cc0d43d382aa-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7bb060764a818184ebb1cc0d43d382aa-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7bb060764a818184ebb1cc0d43d382aa-Reviews.html", "metareview": "", "pdf_size": 569144, "gs_citation": 116, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12197551523471076162&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 16, "aff": "The University of Iowa, Iowa city, IA 52245; University of Central Florida, Orlando, FL 32816; The University of Iowa, Iowa city, IA 52245", "aff_domain": "uiowa.edu;crcv.ucf.edu;uiowa.edu", "email": "uiowa.edu;crcv.ucf.edu;uiowa.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7bb060764a818184ebb1cc0d43d382aa-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Iowa;University of Central Florida", "aff_unique_dep": ";", "aff_unique_url": "https://www.uiowa.edu;https://www.ucf.edu", "aff_unique_abbr": "UIowa;UCF", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Iowa City;Orlando", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Improved Error Bounds for Tree Representations of Metric Spaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8518", "id": "8518", "author_site": "Samir Chowdhury, Facundo M\u00e9moli, Zane T Smith", "author": "Samir Chowdhury; Facundo M\u00e9moli; Zane T Smith", "abstract": "Estimating optimal phylogenetic trees or hierarchical clustering trees from metric data is an important problem in evolutionary biology and data analysis. Intuitively, the goodness-of-fit of a metric space to a tree depends on its inherent treeness, as well as other metric properties such as intrinsic dimension. Existing algorithms for embedding metric spaces into tree metrics provide distortion bounds depending on cardinality. Because cardinality is a simple property of any set, we argue that such bounds do not fully capture the rich structure endowed by the metric. We consider an embedding of a metric space into a tree proposed by Gromov. By proving a stability result, we obtain an improved additive distortion bound depending only on the hyperbolicity and doubling dimension of the metric. We observe that Gromov's method is dual to the well-known single linkage hierarchical clustering (SLHC) method. By means of this duality, we are able to transport our results to the setting of SLHC, where such additive distortion bounds were previously unknown.", "bibtex": "@inproceedings{NIPS2016_b069b341,\n author = {Chowdhury, Samir and M\\'{e}moli, Facundo and Smith, Zane T},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improved Error Bounds for Tree Representations of Metric Spaces},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b069b3415151fa7217e870017374de7c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b069b3415151fa7217e870017374de7c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b069b3415151fa7217e870017374de7c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b069b3415151fa7217e870017374de7c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b069b3415151fa7217e870017374de7c-Reviews.html", "metareview": "", "pdf_size": 257291, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7416060273168748892&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of Mathematics+Department of Computer Science and Engineering, The Ohio State University; Department of Mathematics+Department of Computer Science and Engineering, The Ohio State University; Department of Computer Science and Engineering, The Ohio State University", "aff_domain": "osu.edu;math.osu.edu;osu.edu", "email": "osu.edu;math.osu.edu;osu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b069b3415151fa7217e870017374de7c-Abstract.html", "aff_unique_index": "0+1;0+1;1", "aff_unique_norm": "Mathematics Department;Ohio State University", "aff_unique_dep": "Department of Mathematics;Department of Computer Science and Engineering", "aff_unique_url": ";https://www.osu.edu", "aff_unique_abbr": ";OSU", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";United States" }, { "title": "Improved Regret Bounds for Oracle-Based Adversarial Contextual Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7076", "id": "7076", "author_site": "Vasilis Syrgkanis, Haipeng Luo, Akshay Krishnamurthy, Robert Schapire", "author": "Vasilis Syrgkanis; Haipeng Luo; Akshay Krishnamurthy; Robert E. Schapire", "abstract": "We propose a new oracle-based algorithm, BISTRO+, for the adversarial contextual bandit problem, where either contexts are drawn i.i.d. or the sequence of contexts is known a priori, but where the losses are picked adversarially. Our algorithm is computationally efficient, assuming access to an offline optimization oracle, and enjoys a regret of order $O((KT)^{\\frac{2}{3}}(\\log N)^{\\frac{1}{3}})$, where $K$ is the number of actions, $T$ is the number of iterations, and $N$ is the number of baseline policies. Our result is the first to break the $O(T^{\\frac{3}{4}})$ barrier achieved by recent algorithms, which was left as a major open problem. Our analysis employs the recent relaxation framework of (Rakhlin and Sridharan, ICML'16).", "bibtex": "@inproceedings{NIPS2016_dfa92d8f,\n author = {Syrgkanis, Vasilis and Luo, Haipeng and Krishnamurthy, Akshay and Schapire, Robert E},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improved Regret Bounds for Oracle-Based Adversarial Contextual Bandits},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/dfa92d8f817e5b08fcaafb50d03763cf-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/dfa92d8f817e5b08fcaafb50d03763cf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/dfa92d8f817e5b08fcaafb50d03763cf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/dfa92d8f817e5b08fcaafb50d03763cf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/dfa92d8f817e5b08fcaafb50d03763cf-Reviews.html", "metareview": "", "pdf_size": 230106, "gs_citation": 50, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15002656164448058756&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Microsoft Research; Microsoft Research; University of Massachusetts, Amherst; Microsoft Research", "aff_domain": "microsoft.com;microsoft.com;cs.umass.edu;microsoft.com", "email": "microsoft.com;microsoft.com;cs.umass.edu;microsoft.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/dfa92d8f817e5b08fcaafb50d03763cf-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Microsoft;University of Massachusetts Amherst", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.umass.edu", "aff_unique_abbr": "MSR;UMass Amherst", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Improved Techniques for Training GANs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7026", "id": "7026", "author_site": "Tim Salimans, Ian Goodfellow, Wojciech Zaremba, Vicki Cheung, Alec Radford, Peter Chen, Xi Chen", "author": "Tim Salimans; Ian Goodfellow; Wojciech Zaremba; Vicki Cheung; Alec Radford; Xi Chen; Xi Chen", "abstract": "We present a variety of new architectural features and training procedures that we apply to the generative adversarial networks (GANs) framework. Using our new techniques, we achieve state-of-the-art results in semi-supervised classification on MNIST, CIFAR-10 and SVHN. The generated images are of high quality as confirmed by a visual Turing test: Our model generates MNIST samples that humans cannot distinguish from real data, and CIFAR-10 samples that yield a human error rate of 21.3%. We also present ImageNet samples with unprecedented resolution and show that our methods enable the model to learn recognizable features of ImageNet classes.", "bibtex": "@inproceedings{NIPS2016_8a3363ab,\n author = {Salimans, Tim and Goodfellow, Ian and Zaremba, Wojciech and Cheung, Vicki and Radford, Alec and Chen, Xi and Chen, Xi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improved Techniques for Training GANs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8a3363abe792db2d8761d6403605aeb7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8a3363abe792db2d8761d6403605aeb7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8a3363abe792db2d8761d6403605aeb7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8a3363abe792db2d8761d6403605aeb7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8a3363abe792db2d8761d6403605aeb7-Reviews.html", "metareview": "", "pdf_size": 2639054, "gs_citation": 12136, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2151481962498772342&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": ";;;;;;", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "https://github.com/openai/improved-gan", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8a3363abe792db2d8761d6403605aeb7-Abstract.html" }, { "title": "Improved Variational Inference with Inverse Autoregressive Flow", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6943", "id": "6943", "author_site": "Diederik Kingma, Tim Salimans, Rafal Jozefowicz, Peter Chen, Xi Chen, Ilya Sutskever, Max Welling", "author": "Diederik P. Kingma; Tim Salimans; Rafal Jozefowicz; Xi Chen; Ilya Sutskever; Max Welling", "abstract": "The framework of normalizing flows provides a general strategy for flexible variational inference of posteriors over latent variables. We propose a new type of normalizing flow, inverse autoregressive flow (IAF), that, in contrast to earlier published flows, scales well to high-dimensional latent spaces. The proposed flow consists of a chain of invertible transformations, where each transformation is based on an autoregressive neural network. In experiments, we show that IAF significantly improves upon diagonal Gaussian approximate posteriors. In addition, we demonstrate that a novel type of variational autoencoder, coupled with IAF, is competitive with neural autoregressive models in terms of attained log-likelihood on natural images, while allowing significantly faster synthesis.", "bibtex": "@inproceedings{NIPS2016_ddeebdee,\n author = {Kingma, Durk P and Salimans, Tim and Jozefowicz, Rafal and Chen, Xi and Sutskever, Ilya and Welling, Max},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improved Variational Inference with Inverse Autoregressive Flow},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ddeebdeefdb7e7e7a697e1c3e3d8ef54-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ddeebdeefdb7e7e7a697e1c3e3d8ef54-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ddeebdeefdb7e7e7a697e1c3e3d8ef54-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ddeebdeefdb7e7e7a697e1c3e3d8ef54-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ddeebdeefdb7e7e7a697e1c3e3d8ef54-Reviews.html", "metareview": "", "pdf_size": 988409, "gs_citation": 2328, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9037312380498030932&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "OpenAI; OpenAI; OpenAI; OpenAI; OpenAI; University of Amsterdam + University of California Irvine + Canadian Institute for Advanced Research (CIFAR)", "aff_domain": "openai.com;openai.com;openai.com;openai.com;openai.com;uva.nl", "email": "openai.com;openai.com;openai.com;openai.com;openai.com;uva.nl", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ddeebdeefdb7e7e7a697e1c3e3d8ef54-Abstract.html", "aff_unique_index": "0;0;0;0;0;1+2+3", "aff_unique_norm": "OpenAI;University of Amsterdam;University of California, Irvine;Canadian Institute for Advanced Research", "aff_unique_dep": ";;;", "aff_unique_url": "https://openai.com;https://www.uva.nl;https://www.uci.edu;https://www.cifar.ca", "aff_unique_abbr": "OpenAI;UvA;UCI;CIFAR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Irvine", "aff_country_unique_index": "0;0;0;0;0;1+0+2", "aff_country_unique": "United States;Netherlands;Canada" }, { "title": "Improving PAC Exploration Using the Median Of Means", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6929", "id": "6929", "author_site": "Jason Pazis, Ronald Parr, Jonathan How", "author": "Jason Pazis; Ronald E Parr; Jonathan P How", "abstract": "We present the first application of the median of means in a PAC exploration algorithm for MDPs. Using the median of means allows us to significantly reduce the dependence of our bounds on the range of values that the value function can take, while introducing a dependence on the (potentially much smaller) variance of the Bellman operator. Additionally, our algorithm is the first algorithm with PAC bounds that can be applied to MDPs with unbounded rewards.", "bibtex": "@inproceedings{NIPS2016_139f0874,\n author = {Pazis, Jason and Parr, Ronald E and How, Jonathan P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Improving PAC Exploration Using the Median Of Means},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/139f0874f2ded2e41b0393c4ac5644f7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/139f0874f2ded2e41b0393c4ac5644f7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/139f0874f2ded2e41b0393c4ac5644f7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/139f0874f2ded2e41b0393c4ac5644f7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/139f0874f2ded2e41b0393c4ac5644f7-Reviews.html", "metareview": "", "pdf_size": 481324, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9186201026920239524&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Laboratory for Information and Decision Systems, Massachusetts Institute of Technology; Department of Computer Science, Duke University; Aerospace Controls Laboratory, Department of Aeronautics and Astronautics, Massachusetts Institute of Technology", "aff_domain": "mit.edu;cs.duke.edu;mit.edu", "email": "mit.edu;cs.duke.edu;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/139f0874f2ded2e41b0393c4ac5644f7-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Massachusetts Institute of Technology;Duke University", "aff_unique_dep": "Laboratory for Information and Decision Systems;Department of Computer Science", "aff_unique_url": "https://web.mit.edu;https://www.duke.edu", "aff_unique_abbr": "MIT;Duke", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Incremental Boosting Convolutional Neural Network for Facial Action Unit Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7094", "id": "7094", "author_site": "Shizhong Han, Zibo Meng, AHMED-SHEHAB KHAN, Yan Tong", "author": "Shizhong Han; Zibo Meng; AHMED-SHEHAB KHAN; Yan Tong", "abstract": "Recognizing facial action units (AUs) from spontaneous facial expressions is still a challenging problem. Most recently, CNNs have shown promise on facial AU recognition. However, the learned CNNs are often overfitted and do not generalize well to unseen subjects due to limited AU-coded training images. We proposed a novel Incremental Boosting CNN (IB-CNN) to integrate boosting into the CNN via an incremental boosting layer that selects discriminative neurons from the lower layer and is incrementally updated on successive mini-batches. In addition, a novel loss function that accounts for errors from both the incremental boosted classifier and individual weak classifiers was proposed to fine-tune the IB-CNN. Experimental results on four benchmark AU databases have demonstrated that the IB-CNN yields significant improvement over the traditional CNN and the boosting CNN without incremental learning, as well as outperforming the state-of-the-art CNN-based methods in AU recognition. The improvement is more impressive for the AUs that have the lowest frequencies in the databases.", "bibtex": "@inproceedings{NIPS2016_d09bf415,\n author = {Han, Shizhong and Meng, Zibo and KHAN, AHMED-SHEHAB and Tong, Yan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Incremental Boosting Convolutional Neural Network for Facial Action Unit Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d09bf41544a3365a46c9077ebb5e35c3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d09bf41544a3365a46c9077ebb5e35c3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d09bf41544a3365a46c9077ebb5e35c3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d09bf41544a3365a46c9077ebb5e35c3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d09bf41544a3365a46c9077ebb5e35c3-Reviews.html", "metareview": "", "pdf_size": 593086, "gs_citation": 143, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7573542732464793344&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d09bf41544a3365a46c9077ebb5e35c3-Abstract.html" }, { "title": "Incremental Variational Sparse Gaussian Process Regression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7045", "id": "7045", "author_site": "Ching-An Cheng, Byron Boots", "author": "Ching-An Cheng; Byron Boots", "abstract": "Recent work on scaling up Gaussian process regression (GPR) to large datasets has primarily focused on sparse GPR, which leverages a small set of basis functions to approximate the full Gaussian process during inference. However, the majority of these approaches are batch methods that operate on the entire training dataset at once, precluding the use of datasets that are streaming or too large to fit into memory. Although previous work has considered incrementally solving variational sparse GPR, most algorithms fail to update the basis functions and therefore perform suboptimally. We propose a novel incremental learning algorithm for variational sparse GPR based on stochastic mirror ascent of probability densities in reproducing kernel Hilbert space. This new formulation allows our algorithm to update basis functions online in accordance with the manifold structure of probability densities for fast convergence. We conduct several experiments and show that our proposed approach achieves better empirical performance in terms of prediction error than the recent state-of-the-art incremental solutions to variational sparse GPR.", "bibtex": "@inproceedings{NIPS2016_2596a54c,\n author = {Cheng, Ching-An and Boots, Byron},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Incremental Variational Sparse Gaussian Process Regression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2596a54cdbb555cfd09cd5d991da0f55-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2596a54cdbb555cfd09cd5d991da0f55-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2596a54cdbb555cfd09cd5d991da0f55-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2596a54cdbb555cfd09cd5d991da0f55-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2596a54cdbb555cfd09cd5d991da0f55-Reviews.html", "metareview": "", "pdf_size": 1440172, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5797309783775550148&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Institute for Robotics and Intelligent Machines, Georgia Institute of Technology; Institute for Robotics and Intelligent Machines, Georgia Institute of Technology", "aff_domain": "gatech.edu;cc.gatech.edu", "email": "gatech.edu;cc.gatech.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2596a54cdbb555cfd09cd5d991da0f55-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "Institute for Robotics and Intelligent Machines", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Atlanta", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Inference by Reparameterization in Neural Population Codes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7200", "id": "7200", "author_site": "Rajkumar Vasudeva Raju, Xaq Pitkow", "author": "Rajkumar Vasudeva Raju; Zachary Pitkow", "abstract": "Behavioral experiments on humans and animals suggest that the brain performs probabilistic inference to interpret its environment. Here we present a new general-purpose, biologically-plausible neural implementation of approximate inference. The neural network represents uncertainty using Probabilistic Population Codes (PPCs), which are distributed neural representations that naturally encode probability distributions, and support marginalization and evidence integration in a biologically-plausible manner. By connecting multiple PPCs together as a probabilistic graphical model, we represent multivariate probability distributions. Approximate inference in graphical models can be accomplished by message-passing algorithms that disseminate local information throughout the graph. An attractive and often accurate example of such an algorithm is Loopy Belief Propagation (LBP), which uses local marginalization and evidence integration operations to perform approximate inference efficiently even for complex models. Unfortunately, a subtle feature of LBP renders it neurally implausible. However, LBP can be elegantly reformulated as a sequence of Tree-based Reparameterizations (TRP) of the graphical model. We re-express the TRP updates as a nonlinear dynamical system with both fast and slow timescales, and show that this produces a neurally plausible solution. By combining all of these ideas, we show that a network of PPCs can represent multivariate probability distributions and implement the TRP updates to perform probabilistic inference. Simulations with Gaussian graphical models demonstrate that the neural network inference quality is comparable to the direct evaluation of LBP and robust to noise, and thus provides a promising mechanism for general probabilistic inference in the population codes of the brain.", "bibtex": "@inproceedings{NIPS2016_a26398dc,\n author = {Vasudeva Raju, Rajkumar and Pitkow, Zachary},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Inference by Reparameterization in Neural Population Codes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a26398dca6f47b49876cbaffbc9954f9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a26398dca6f47b49876cbaffbc9954f9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a26398dca6f47b49876cbaffbc9954f9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a26398dca6f47b49876cbaffbc9954f9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a26398dca6f47b49876cbaffbc9954f9-Reviews.html", "metareview": "", "pdf_size": 1453547, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14899537370606975657&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Department of ECE, Rice University; Dept. of Neuroscience, Baylor College of Medicine + Dept. of ECE, Rice University", "aff_domain": "rice.edu;rice.edu", "email": "rice.edu;rice.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a26398dca6f47b49876cbaffbc9954f9-Abstract.html", "aff_unique_index": "0;1+0", "aff_unique_norm": "Rice University;Baylor College of Medicine", "aff_unique_dep": "Department of Electrical and Computer Engineering;Dept. of Neuroscience", "aff_unique_url": "https://www.rice.edu;https://www.bcm.edu", "aff_unique_abbr": "Rice;BCM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0", "aff_country_unique": "United States" }, { "title": "Infinite Hidden Semi-Markov Modulated Interaction Point Process", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7391", "id": "7391", "author_site": "matt zhang, Peng Lin, Peng Lin, Ting Guo, Yang Wang, Yang Wang, Fang Chen", "author": "matt zhang; Peng Lin; Peng Lin; Ting Guo; Yang Wang; Yang Wang; Fang Chen", "abstract": "The correlation between events is ubiquitous and important for temporal events modelling. In many cases, the correlation exists between not only events' emitted observations, but also their arrival times. State space models (e.g., hidden Markov model) and stochastic interaction point process models (e.g., Hawkes process) have been studied extensively yet separately for the two types of correlations in the past. In this paper, we propose a Bayesian nonparametric approach that considers both types of correlations via unifying and generalizing hidden semi-Markov model and interaction point process model. The proposed approach can simultaneously model both the observations and arrival times of temporal events, and determine the number of latent states from data. A Metropolis-within-particle-Gibbs sampler with ancestor resampling is developed for efficient posterior inference. The approach is tested on both synthetic and real-world data with promising outcomes.", "bibtex": "@inproceedings{NIPS2016_8d55a249,\n author = {zhang, matt and Lin, Peng and Lin, Peng and Guo, Ting and Wang, Yang and Wang, Yang and Chen, Fang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Infinite Hidden Semi-Markov Modulated Interaction Point Process},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8d55a249e6baa5c06772297520da2051-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8d55a249e6baa5c06772297520da2051-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8d55a249e6baa5c06772297520da2051-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8d55a249e6baa5c06772297520da2051-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8d55a249e6baa5c06772297520da2051-Reviews.html", "metareview": "", "pdf_size": 551840, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2305058564441217954&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": ";;;;;;", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8d55a249e6baa5c06772297520da2051-Abstract.html" }, { "title": "InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7140", "id": "7140", "author_site": "Xi Chen, Peter Chen, Yan Duan, Rein Houthooft, John Schulman, Ilya Sutskever, Pieter Abbeel", "author": "Xi Chen; Yan Duan; Rein Houthooft; John Schulman; Ilya Sutskever; Pieter Abbeel", "abstract": "This paper describes InfoGAN, an information-theoretic extension to the Generative Adversarial Network that is able to learn disentangled representations in a completely unsupervised manner. InfoGAN is a generative adversarial network that also maximizes the mutual information between a small subset of the latent variables and the observation. We derive a lower bound to the mutual information objective that can be optimized efficiently, and show that our training procedure can be interpreted as a variation of the Wake-Sleep algorithm. Specifically, InfoGAN successfully disentangles writing styles from digit shapes on the MNIST dataset, pose from lighting of 3D rendered images, and background digits from the central digit on the SVHN dataset. It also discovers visual concepts that include hair styles, presence/absence of eyeglasses, and emotions on the CelebA face dataset. Experiments show that InfoGAN learns interpretable representations that are competitive with representations learned by existing fully supervised methods.", "bibtex": "@inproceedings{NIPS2016_7c9d0b1f,\n author = {Chen, Xi and Duan, Yan and Houthooft, Rein and Schulman, John and Sutskever, Ilya and Abbeel, Pieter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7c9d0b1f96aebd7b5eca8c3edaa19ebb-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7c9d0b1f96aebd7b5eca8c3edaa19ebb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7c9d0b1f96aebd7b5eca8c3edaa19ebb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7c9d0b1f96aebd7b5eca8c3edaa19ebb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7c9d0b1f96aebd7b5eca8c3edaa19ebb-Reviews.html", "metareview": "", "pdf_size": 3653047, "gs_citation": 5972, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14881367722116467754&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7c9d0b1f96aebd7b5eca8c3edaa19ebb-Abstract.html" }, { "title": "Integrated perception with recurrent multi-task neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7188", "id": "7188", "author_site": "Hakan Bilen, Andrea Vedaldi", "author": "Hakan Bilen; Andrea Vedaldi", "abstract": "Modern discriminative predictors have been shown to match natural intelligences in specific perceptual tasks in image classification, object and part detection, boundary extraction, etc. However, a major advantage that natural intelligences still have is that they work well for all perceptual problems together, solving them efficiently and coherently in an integrated manner. In order to capture some of these advantages in machine perception, we ask two questions: whether deep neural networks can learn universal image representations, useful not only for a single task but for all of them, and how the solutions to the different tasks can be integrated in this framework. We answer by proposing a new architecture, which we call multinet, in which not only deep image features are shared between tasks, but where tasks can interact in a recurrent manner by encoding the results of their analysis in a common shared representation of the data. In this manner, we show that the performance of individual tasks in standard benchmarks can be improved first by sharing features between them and then, more significantly, by integrating their solutions in the common representation.", "bibtex": "@inproceedings{NIPS2016_06409663,\n author = {Bilen, Hakan and Vedaldi, Andrea},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Integrated perception with recurrent multi-task neural networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/06409663226af2f3114485aa4e0a23b4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/06409663226af2f3114485aa4e0a23b4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/06409663226af2f3114485aa4e0a23b4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/06409663226af2f3114485aa4e0a23b4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/06409663226af2f3114485aa4e0a23b4-Reviews.html", "metareview": "", "pdf_size": 186094, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8603719625177727046&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "Visual Geometry Group, University of Oxford; Visual Geometry Group, University of Oxford", "aff_domain": "robots.ox.ac.uk;robots.ox.ac.uk", "email": "robots.ox.ac.uk;robots.ox.ac.uk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/06409663226af2f3114485aa4e0a23b4-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "Visual Geometry Group", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Oxford", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "Interaction Networks for Learning about Objects, Relations and Physics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6908", "id": "6908", "author_site": "Peter Battaglia, Razvan Pascanu, Matthew Lai, Danilo Jimenez Rezende, koray kavukcuoglu", "author": "Peter Battaglia; Razvan Pascanu; Matthew Lai; Danilo Jimenez Rezende; koray kavukcuoglu", "abstract": "Reasoning about objects, relations, and physics is central to human intelligence, and a key goal of artificial intelligence. Here we introduce the interaction network, a model which can reason about how objects in complex systems interact, supporting dynamical predictions, as well as inferences about the abstract properties of the system. Our model takes graphs as input, performs object- and relation-centric reasoning in a way that is analogous to a simulation, and is implemented using deep neural networks. We evaluate its ability to reason about several challenging physical domains: n-body problems, rigid-body collision, and non-rigid dynamics. Our results show it can be trained to accurately simulate the physical trajectories of dozens of objects over thousands of time steps, estimate abstract quantities such as energy, and generalize automatically to systems with different numbers and configurations of objects and relations. Our interaction network implementation is the first general-purpose, learnable physics engine, and a powerful general framework for reasoning about object and relations in a wide variety of complex real-world domains.", "bibtex": "@inproceedings{NIPS2016_3147da8a,\n author = {Battaglia, Peter and Pascanu, Razvan and Lai, Matthew and Jimenez Rezende, Danilo and kavukcuoglu, koray},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Interaction Networks for Learning about Objects, Relations and Physics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3147da8ab4a0437c15ef51a5cc7f2dc4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3147da8ab4a0437c15ef51a5cc7f2dc4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3147da8ab4a0437c15ef51a5cc7f2dc4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3147da8ab4a0437c15ef51a5cc7f2dc4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3147da8ab4a0437c15ef51a5cc7f2dc4-Reviews.html", "metareview": "", "pdf_size": 1911277, "gs_citation": 1821, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17057667931848922246&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3147da8ab4a0437c15ef51a5cc7f2dc4-Abstract.html" }, { "title": "Interaction Screening: Efficient and Sample-Optimal Learning of Ising Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6989", "id": "6989", "author_site": "Marc Vuffray, Sidhant Misra, Andrey Lokhov, Michael Chertkov", "author": "Marc Vuffray; Sidhant Misra; Andrey Lokhov; Michael Chertkov", "abstract": "We consider the problem of learning the underlying graph of an unknown Ising model on p spins from a collection of i.i.d. samples generated from the model. We suggest a new estimator that is computationally efficient and requires a number of samples that is near-optimal with respect to previously established information theoretic lower-bound. Our statistical estimator has a physical interpretation in terms of \"interaction screening\". The estimator is consistent and is efficiently implemented using convex optimization. We prove that with appropriate regularization, the estimator recovers the underlying graph using a number of samples that is logarithmic in the system size p and exponential in the maximum coupling-intensity and maximum node-degree.", "bibtex": "@inproceedings{NIPS2016_861dc9bd,\n author = {Vuffray, Marc and Misra, Sidhant and Lokhov, Andrey and Chertkov, Michael},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Interaction Screening: Efficient and Sample-Optimal Learning of Ising Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/861dc9bd7f4e7dd3cccd534d0ae2a2e9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/861dc9bd7f4e7dd3cccd534d0ae2a2e9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/861dc9bd7f4e7dd3cccd534d0ae2a2e9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/861dc9bd7f4e7dd3cccd534d0ae2a2e9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/861dc9bd7f4e7dd3cccd534d0ae2a2e9-Reviews.html", "metareview": "", "pdf_size": 327188, "gs_citation": 148, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4677268255154673001&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Theoretical Division T-4, Los Alamos National Laboratory, Los Alamos, NM 87545, USA; Theoretical Division T-5, Los Alamos National Laboratory, Los Alamos, NM 87545, USA; Center for Nonlinear Studies, Los Alamos National Laboratory, Los Alamos, NM 87545, USA + Skolkovo Institute of Science and Technology, 143026 Moscow, Russia; Theoretical Division T-4, Los Alamos National Laboratory, Los Alamos, NM 87545, USA + Center for Nonlinear Studies, Los Alamos National Laboratory, Los Alamos, NM 87545, USA + Skolkovo Institute of Science and Technology, 143026 Moscow, Russia", "aff_domain": "lanl.gov;lanl.gov;lanl.gov;lanl.gov", "email": "lanl.gov;lanl.gov;lanl.gov;lanl.gov", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/861dc9bd7f4e7dd3cccd534d0ae2a2e9-Abstract.html", "aff_unique_index": "0;0;0+1;0+0+1", "aff_unique_norm": "Los Alamos National Laboratory;Skolkovo Institute of Science and Technology", "aff_unique_dep": "Theoretical Division T-4;", "aff_unique_url": "https://www.lanl.gov;https://www.skoltech.ru", "aff_unique_abbr": "LANL;Skoltech", "aff_campus_unique_index": "0;0;0;0+0", "aff_campus_unique": "Los Alamos;", "aff_country_unique_index": "0;0;0+1;0+0+1", "aff_country_unique": "United States;Russian Federation" }, { "title": "Interpretable Distribution Features with Maximum Testing Power", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7425", "id": "7425", "author_site": "Wittawat Jitkrittum, Zolt\u00e1n Szab\u00f3, Kacper P Chwialkowski, Arthur Gretton", "author": "Wittawat Jitkrittum; Zolt\u00e1n Szab\u00f3; Kacper P Chwialkowski; Arthur Gretton", "abstract": "Two semimetrics on probability distributions are proposed, given as the sum of differences of expectations of analytic functions evaluated at spatial or frequency locations (i.e, features). The features are chosen so as to maximize the distinguishability of the distributions, by optimizing a lower bound on test power for a statistical test using these features. The result is a parsimonious and interpretable indication of how and where two distributions differ locally. An empirical estimate of the test power criterion converges with increasing sample size, ensuring the quality of the returned features. In real-world benchmarks on high-dimensional text and image data, linear-time tests using the proposed semimetrics achieve comparable performance to the state-of-the-art quadratic-time maximum mean discrepancy test, while returning human-interpretable features that explain the test results.", "bibtex": "@inproceedings{NIPS2016_0a09c884,\n author = {Jitkrittum, Wittawat and Szab\\'{o}, Zolt\\'{a}n and Chwialkowski, Kacper P and Gretton, Arthur},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Interpretable Distribution Features with Maximum Testing Power},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0a09c8844ba8f0936c20bd791130d6b6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0a09c8844ba8f0936c20bd791130d6b6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0a09c8844ba8f0936c20bd791130d6b6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0a09c8844ba8f0936c20bd791130d6b6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0a09c8844ba8f0936c20bd791130d6b6-Reviews.html", "metareview": "", "pdf_size": 1654516, "gs_citation": 159, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8544087657554515485&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Gatsby Unit, University College London; Gatsby Unit, University College London; Gatsby Unit, University College London; Gatsby Unit, University College London", "aff_domain": "gmail.com;gmail.com;gmail.com;gmail.com", "email": "gmail.com;gmail.com;gmail.com;gmail.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0a09c8844ba8f0936c20bd791130d6b6-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "Gatsby Unit", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Interpretable Nonlinear Dynamic Modeling of Neural Trajectories", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7130", "id": "7130", "author_site": "Yuan Zhao, Il Memming Park", "author": "Yuan Zhao; Ill Memming Park", "abstract": "A central challenge in neuroscience is understanding how neural system implements computation through its dynamics. We propose a nonlinear time series model aimed at characterizing interpretable dynamics from neural trajectories. Our model assumes low-dimensional continuous dynamics in a finite volume. It incorporates a prior assumption about globally contractional dynamics to avoid overly enthusiastic extrapolation outside of the support of observed trajectories. We show that our model can recover qualitative features of the phase portrait such as attractors, slow points, and bifurcations, while also producing reliable long-term future predictions in a variety of dynamical models and in real neural data.", "bibtex": "@inproceedings{NIPS2016_b2531e7b,\n author = {Zhao, Yuan and Park, Il Memming},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Interpretable Nonlinear Dynamic Modeling of Neural Trajectories},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b2531e7bb29bf22e1daae486fae3417a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b2531e7bb29bf22e1daae486fae3417a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b2531e7bb29bf22e1daae486fae3417a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b2531e7bb29bf22e1daae486fae3417a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b2531e7bb29bf22e1daae486fae3417a-Reviews.html", "metareview": "", "pdf_size": 4788525, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8306689803380856932&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Neurobiology and Behavior; Department of Applied Mathematics and Statistics + Institute for Advanced Computational Science", "aff_domain": "stonybrook.edu;stonybrook.edu", "email": "stonybrook.edu;stonybrook.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b2531e7bb29bf22e1daae486fae3417a-Abstract.html", "aff_unique_index": "0;1+0", "aff_unique_norm": "Stony Brook University;Department of Applied Mathematics and Statistics", "aff_unique_dep": "Department of Neurobiology and Behavior;Applied Mathematics and Statistics", "aff_unique_url": "https://www.stonybrook.edu;", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "title": "Iterative Refinement of the Approximate Posterior for Directed Belief Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7038", "id": "7038", "author_site": "R Devon Hjelm, Russ Salakhutdinov, Kyunghyun Cho, Nebojsa Jojic, Vince Calhoun, Junyoung Chung", "author": "Devon Hjelm; Ruslan Salakhutdinov; Kyunghyun Cho; Nebojsa Jojic; Vince Calhoun; Junyoung Chung", "abstract": "Variational methods that rely on a recognition network to approximate the posterior of directed graphical models offer better inference and learning than previous methods. Recent advances that exploit the capacity and flexibility in this approach have expanded what kinds of models can be trained. However, as a proposal for the posterior, the capacity of the recognition network is limited, which can constrain the representational power of the generative model and increase the variance of Monte Carlo estimates. To address these issues, we introduce an iterative refinement procedure for improving the approximate posterior of the recognition network and show that training with the refined posterior is competitive with state-of-the-art methods. The advantages of refinement are further evident in an increased effective sample size, which implies a lower variance of gradient estimates.", "bibtex": "@inproceedings{NIPS2016_20c9f570,\n author = {Hjelm, Devon and Salakhutdinov, Russ R and Cho, Kyunghyun and Jojic, Nebojsa and Calhoun, Vince and Chung, Junyoung},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Iterative Refinement of the Approximate Posterior for Directed Belief Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/20c9f5700da1088260df60fcc5df2b53-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/20c9f5700da1088260df60fcc5df2b53-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/20c9f5700da1088260df60fcc5df2b53-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/20c9f5700da1088260df60fcc5df2b53-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/20c9f5700da1088260df60fcc5df2b53-Reviews.html", "metareview": "", "pdf_size": 552118, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6403979637394349378&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of New Mexico and the Mind Research Network; Courant Institute & Center for Data Science, New York University; University of Montreal; Carnegie Melon University; University of New Mexico and the Mind Research Network; Microsoft Research", "aff_domain": "mrn.org;nyu.edu;umontreal.ca;cs.toronto.edu;mrn.org;microsoft.com", "email": "mrn.org;nyu.edu;umontreal.ca;cs.toronto.edu;mrn.org;microsoft.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/20c9f5700da1088260df60fcc5df2b53-Abstract.html", "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "University of New Mexico;New York University;University of Montreal;Carnegie Mellon University;Microsoft", "aff_unique_dep": ";Courant Institute & Center for Data Science;;;Microsoft Research", "aff_unique_url": "https://www.unm.edu;https://www.nyu.edu;https://wwwumontreal.ca;https://www.cmu.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UNM;NYU;UM;CMU;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";New York", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "title": "Joint Line Segmentation and Transcription for End-to-End Handwritten Paragraph Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7157", "id": "7157", "author": "Theodore Bluche", "abstract": "Offline handwriting recognition systems require cropped text line images for both training and recognition. On the one hand, the annotation of position and transcript at line level is costly to obtain. On the other hand, automatic line segmentation algorithms are prone to errors, compromising the subsequent recognition. In this paper, we propose a modification of the popular and efficient Multi-Dimensional Long Short-Term Memory Recurrent Neural Networks (MDLSTM-RNNs) to enable end-to-end processing of handwritten paragraphs. More particularly, we replace the collapse layer transforming the two-dimensional representation into a sequence of predictions by a recurrent version which can select one line at a time. In the proposed model, a neural network performs a kind of implicit line segmentation by computing attention weights on the image representation. The experiments on paragraphs of Rimes and IAM databases yield results that are competitive with those of networks trained at line level, and constitute a significant step towards end-to-end transcription of full documents.", "bibtex": "@inproceedings{NIPS2016_2bb232c0,\n author = {Bluche, Theodore},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Joint Line Segmentation and Transcription for End-to-End Handwritten Paragraph Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2bb232c0b13c774965ef8558f0fbd615-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2bb232c0b13c774965ef8558f0fbd615-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2bb232c0b13c774965ef8558f0fbd615-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2bb232c0b13c774965ef8558f0fbd615-Reviews.html", "metareview": "", "pdf_size": 2339819, "gs_citation": 253, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12483229472479638241&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "A2iA SAS", "aff_domain": "a2ia.com", "email": "a2ia.com", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2bb232c0b13c774965ef8558f0fbd615-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "A2iA SAS", "aff_unique_dep": "", "aff_unique_url": "https://www.a2ia.com", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "title": "Joint M-Best-Diverse Labelings as a Parametric Submodular Minimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7159", "id": "7159", "author_site": "Alexander Kirillov, Alexander Shekhovtsov, Carsten Rother, Bogdan Savchynskyy", "author": "Alexander Kirillov; Alexander Shekhovtsov; Carsten Rother; Bogdan Savchynskyy", "abstract": "We consider the problem of jointly inferring the $M$-best diverse labelings for a binary (high-order) submodular energy of a graphical model. Recently, it was shown that this problem can be solved to a global optimum, for many practically interesting diversity measures. It was noted that the labelings are, so-called, nested. This nestedness property also holds for labelings of a class of parametric submodular minimization problems, where different values of the global parameter $\\gamma$ give rise to different solutions. The popular example of the parametric submodular minimization is the monotonic parametric max-flow problem, which is also widely used for computing multiple labelings. As the main contribution of this work we establish a close relationship between diversity with submodular energies and the parametric submodular minimization. In particular, the joint $M$-best diverse labelings can be obtained by running a non-parametric submodular minimization (in the special case - max-flow) solver for $M$ different values of $\\gamma$ in parallel, for certain diversity measures. Importantly, the values for~$\\gamma$ can be computed in a closed form in advance, prior to any optimization. These theoretical results suggest two simple yet efficient algorithms for the joint $M$-best diverse problem, which outperform competitors in terms of runtime and quality of results. In particular, as we show in the paper, the new methods compute the exact $M$-best diverse labelings faster than a popular method of Batra et al., which in some sense only obtains approximate solutions.", "bibtex": "@inproceedings{NIPS2016_979d472a,\n author = {Kirillov, Alexander and Shekhovtsov, Alexander and Rother, Carsten and Savchynskyy, Bogdan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Joint M-Best-Diverse Labelings as a Parametric Submodular Minimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/979d472a84804b9f647bc185a877a8b5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/979d472a84804b9f647bc185a877a8b5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/979d472a84804b9f647bc185a877a8b5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/979d472a84804b9f647bc185a877a8b5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/979d472a84804b9f647bc185a877a8b5-Reviews.html", "metareview": "", "pdf_size": 396585, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12873460311224911320&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "TU Dresden, Dresden, Germany+TU Graz, Graz, Austria; TU Graz, Graz, Austria; TU Dresden, Dresden, Germany; TU Dresden, Dresden, Germany", "aff_domain": "tu-dresden.de; ;tu-dresden.de;tu-dresden.de", "email": "tu-dresden.de; ;tu-dresden.de;tu-dresden.de", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/979d472a84804b9f647bc185a877a8b5-Abstract.html", "aff_unique_index": "0+1;1;0;0", "aff_unique_norm": "Technische Universit\u00e4t Dresden;Graz University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-dresden.de;https://www.tugraz.at", "aff_unique_abbr": "TUD;TU Graz", "aff_campus_unique_index": "0+1;1;0;0", "aff_campus_unique": "Dresden;Graz", "aff_country_unique_index": "0+1;1;0;0", "aff_country_unique": "Germany;Austria" }, { "title": "Joint quantile regression in vector-valued RKHSs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7126", "id": "7126", "author_site": "Maxime Sangnier, Olivier Fercoq, Florence d'Alch\u00e9-Buc", "author": "Maxime Sangnier; Olivier Fercoq; Florence d'Alch\u00e9-Buc", "abstract": "Addressing the will to give a more complete picture than an average relationship provided by standard regression, a novel framework for estimating and predicting simultaneously several conditional quantiles is introduced. The proposed methodology leverages kernel-based multi-task learning to curb the embarrassing phenomenon of quantile crossing, with a one-step estimation procedure and no post-processing. Moreover, this framework comes along with theoretical guarantees and an efficient coordinate descent learning algorithm. Numerical experiments on benchmark and real datasets highlight the enhancements of our approach regarding the prediction error, the crossing occurrences and the training time.", "bibtex": "@inproceedings{NIPS2016_dfce0680,\n author = {Sangnier, Maxime and Fercoq, Olivier and d\\textquotesingle Alch\\'{e}-Buc, Florence},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Joint quantile regression in vector-valued RKHSs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/dfce06801e1a85d6d06f1fdd4475dacd-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/dfce06801e1a85d6d06f1fdd4475dacd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/dfce06801e1a85d6d06f1fdd4475dacd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/dfce06801e1a85d6d06f1fdd4475dacd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/dfce06801e1a85d6d06f1fdd4475dacd-Reviews.html", "metareview": "", "pdf_size": 954844, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16882966614398410139&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "LTCI, CNRS, Tuelecom ParisTech; LTCI, CNRS, Tuelecom ParisTech; LTCI, CNRS, Tuelecom ParisTech", "aff_domain": "telecom-paristech.fr;telecom-paristech.fr;telecom-paristech.fr", "email": "telecom-paristech.fr;telecom-paristech.fr;telecom-paristech.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/dfce06801e1a85d6d06f1fdd4475dacd-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "CNRS", "aff_unique_dep": "LTCI", "aff_unique_url": "https://www.cnrs.fr", "aff_unique_abbr": "CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Kernel Bayesian Inference with Posterior Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7360", "id": "7360", "author_site": "Yang Song, Jun Zhu, Yong Ren", "author": "Yang Song; Jun Zhu; Yong Ren", "abstract": "We propose a vector-valued regression problem whose solution is equivalent to the reproducing kernel Hilbert space (RKHS) embedding of the Bayesian posterior distribution. This equivalence provides a new understanding of kernel Bayesian inference. Moreover, the optimization problem induces a new regularization for the posterior embedding estimator, which is faster and has comparable performance to the squared regularization in kernel Bayes' rule. This regularization coincides with a former thresholding approach used in kernel POMDPs whose consistency remains to be established. Our theoretical work solves this open problem and provides consistency analysis in regression settings. Based on our optimizational formulation, we propose a flexible Bayesian posterior regularization framework which for the first time enables us to put regularization at the distribution level. We apply this method to nonparametric state-space filtering tasks with extremely nonlinear dynamics and show performance gains over all other baselines.", "bibtex": "@inproceedings{NIPS2016_e139c454,\n author = {Song, Yang and Zhu, Jun and Ren, Yong},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Kernel Bayesian Inference with Posterior Regularization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e139c454239bfde741e893edb46a06cc-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e139c454239bfde741e893edb46a06cc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/e139c454239bfde741e893edb46a06cc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e139c454239bfde741e893edb46a06cc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e139c454239bfde741e893edb46a06cc-Reviews.html", "metareview": "", "pdf_size": 942006, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12376421941397782277&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 14, "aff": "Dept. of Physics, Tsinghua University, Beijing, China; Dept. of Comp. Sci. & Tech., TNList Lab+Center for Bio-Inspired Computing Research+State Key Lab for Intell. Tech. & Systems, Tsinghua University, Beijing, China; Dept. of Comp. Sci. & Tech., TNList Lab+Center for Bio-Inspired Computing Research+State Key Lab for Intell. Tech. & Systems, Tsinghua University, Beijing, China", "aff_domain": "cs.stanford.edu;tsinghua.edu.cn;mails.tsinghua.edu.cn", "email": "cs.stanford.edu;tsinghua.edu.cn;mails.tsinghua.edu.cn", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e139c454239bfde741e893edb46a06cc-Abstract.html", "aff_unique_index": "0;0+1+0;0+1+0", "aff_unique_norm": "Tsinghua University;Center for Bio-Inspired Computing Research", "aff_unique_dep": "Dept. of Physics;", "aff_unique_url": "https://www.tsinghua.edu.cn;", "aff_unique_abbr": "THU;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0+0;0+0", "aff_country_unique": "China;" }, { "title": "Kernel Observers: Systems-Theoretic Modeling and Inference of Spatiotemporally Evolving Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7004", "id": "7004", "author_site": "Hassan A Kingravi, Harshal R Maske, Girish Chowdhary", "author": "Hassan A Kingravi; Harshal R Maske; Girish Chowdhary", "abstract": "We consider the problem of estimating the latent state of a spatiotemporally evolving continuous function using very few sensor measurements. We show that layering a dynamical systems prior over temporal evolution of weights of a kernel model is a valid approach to spatiotemporal modeling that does not necessarily require the design of complex nonstationary kernels. Furthermore, we show that such a predictive model can be utilized to determine sensing locations that guarantee that the hidden state of the phenomena can be recovered with very few measurements. We provide sufficient conditions on the number and spatial location of samples required to guarantee state recovery, and provide a lower bound on the minimum number of samples required to robustly infer the hidden states. Our approach outperforms existing methods in numerical experiments.", "bibtex": "@inproceedings{NIPS2016_9d7311ba,\n author = {Kingravi, Hassan A and Maske, Harshal R and Chowdhary, Girish},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Kernel Observers: Systems-Theoretic Modeling and Inference of Spatiotemporally Evolving Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9d7311ba459f9e45ed746755a32dcd11-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9d7311ba459f9e45ed746755a32dcd11-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9d7311ba459f9e45ed746755a32dcd11-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9d7311ba459f9e45ed746755a32dcd11-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9d7311ba459f9e45ed746755a32dcd11-Reviews.html", "metareview": "", "pdf_size": 1276806, "gs_citation": 25, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9806873527265159800&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Pindrop; University of Illinois at Urbana Champaign; University of Illinois at Urbana Champaign", "aff_domain": "pindrop.com;illinois.edu;illinois.edu", "email": "pindrop.com;illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9d7311ba459f9e45ed746755a32dcd11-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Pindrop;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.pindrop.com;https://illinois.edu", "aff_unique_abbr": ";UIUC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Kronecker Determinantal Point Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7235", "id": "7235", "author_site": "Zelda Mariet, Suvrit Sra", "author": "Zelda E. Mariet; Suvrit Sra", "abstract": "Determinantal Point Processes (DPPs) are probabilistic models over all subsets a ground set of N items. They have recently gained prominence in several applications that rely on diverse subsets. However, their applicability to large problems is still limited due to O(N^3) complexity of core tasks such as sampling and learning. We enable efficient sampling and learning for DPPs by introducing KronDPP, a DPP model whose kernel matrix decomposes as a tensor product of multiple smaller kernel matrices. This decomposition immediately enables fast exact sampling. But contrary to what one may expect, leveraging the Kronecker product structure for speeding up DPP learning turns out to be more difficult. We overcome this challenge, and derive batch and stochastic optimization algorithms for efficiently learning the parameters of a KronDPP.", "bibtex": "@inproceedings{NIPS2016_bad5f337,\n author = {Mariet, Zelda E. and Sra, Suvrit},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Kronecker Determinantal Point Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/bad5f33780c42f2588878a9d07405083-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/bad5f33780c42f2588878a9d07405083-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/bad5f33780c42f2588878a9d07405083-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/bad5f33780c42f2588878a9d07405083-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/bad5f33780c42f2588878a9d07405083-Reviews.html", "metareview": "", "pdf_size": 172031, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8316595930825480998&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Massachusetts Institute of Technology; Massachusetts Institute of Technology", "aff_domain": "csail.mit.edu;mit.edu", "email": "csail.mit.edu;mit.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/bad5f33780c42f2588878a9d07405083-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Ladder Variational Autoencoders", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7353", "id": "7353", "author_site": "Casper Kaae S\u00f8nderby, Tapani Raiko, Lars Maal\u00f8e, S\u00f8ren Kaae S\u00f8nderby, Ole Winther", "author": "Casper Kaae S\u00f8nderby; Tapani Raiko; Lars Maal\u00f8e; S\u00f8ren Kaae S\u00f8nderby; Ole Winther", "abstract": "Variational autoencoders are powerful models for unsupervised learning. However deep models with several layers of dependent stochastic variables are difficult to train which limits the improvements obtained using these highly expressive models. We propose a new inference model, the Ladder Variational Autoencoder, that recursively corrects the generative distribution by a data dependent approximate likelihood in a process resembling the recently proposed Ladder Network. We show that this model provides state of the art predictive log-likelihood and tighter log-likelihood lower bound compared to the purely bottom-up inference in layered Variational Autoencoders and other generative models. We provide a detailed analysis of the learned hierarchical latent representation and show that our new inference model is qualitatively different and utilizes a deeper more distributed hierarchy of latent variables. Finally, we observe that batch-normalization and deterministic warm-up (gradually turning on the KL-term) are crucial for training variational models with many stochastic layers.", "bibtex": "@inproceedings{NIPS2016_6ae07dcb,\n author = {S\\o nderby, Casper Kaae and Raiko, Tapani and Maal\\o e, Lars and S\\o nderby, S\\o ren Kaae and Winther, Ole},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Ladder Variational Autoencoders},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6ae07dcb33ec3b7c814df797cbda0f87-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6ae07dcb33ec3b7c814df797cbda0f87-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6ae07dcb33ec3b7c814df797cbda0f87-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6ae07dcb33ec3b7c814df797cbda0f87-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6ae07dcb33ec3b7c814df797cbda0f87-Reviews.html", "metareview": "", "pdf_size": 2645006, "gs_citation": 1058, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1323199474868567922&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Bioinformatics Centre, Department of Biology, University of Copenhagen, Denmark; Department of Computer Science, Aalto University, Finland; Department of Applied Mathematics and Computer Science, Technical University of Denmark; Bioinformatics Centre, Department of Biology, University of Copenhagen, Denmark; Bioinformatics Centre, Department of Biology, University of Copenhagen, Denmark+Department of Applied Mathematics and Computer Science, Technical University of Denmark", "aff_domain": "gmail.com;aalto.fi;dtu.dk;gmail.com;dtu.dk", "email": "gmail.com;aalto.fi;dtu.dk;gmail.com;dtu.dk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6ae07dcb33ec3b7c814df797cbda0f87-Abstract.html", "aff_unique_index": "0;1;2;0;0+2", "aff_unique_norm": "University of Copenhagen;Aalto University;Technical University of Denmark", "aff_unique_dep": "Department of Biology;Department of Computer Science;Department of Applied Mathematics and Computer Science", "aff_unique_url": "https://www.ku.dk;https://www.aalto.fi;https://www.tu.dk", "aff_unique_abbr": "UCPH;Aalto;DTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0+0", "aff_country_unique": "Denmark;Finland" }, { "title": "Large Margin Discriminant Dimensionality Reduction in Prediction Space", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7263", "id": "7263", "author_site": "Ehsan Saberian, Jose Costa Pereira, Nuno Nvasconcelos, Can Xu", "author": "Mohammad Saberian; Jose Costa Pereira; Can Xu; Jian Yang; Nuno Nvasconcelos", "abstract": "In this paper we establish a duality between boosting and SVM, and use this to derive a novel discriminant dimensionality reduction algorithm. In particular, using the multiclass formulation of boosting and SVM we note that both use a combination of mapping and linear classification to maximize the multiclass margin. In SVM this is implemented using a pre-defined mapping (induced by the kernel) and optimizing the linear classifiers. In boosting the linear classifiers are pre-defined and the mapping (predictor) is learned through combination of weak learners. We argue that the intermediate mapping, e.g. boosting predictor, is preserving the discriminant aspects of the data and by controlling the dimension of this mapping it is possible to achieve discriminant low dimensional representations for the data. We use the aforementioned duality and propose a new method, Large Margin Discriminant Dimensionality Reduction (LADDER) that jointly learns the mapping and the linear classifiers in an efficient manner. This leads to a data-driven mapping which can embed data into any number of dimensions. Experimental results show that this embedding can significantly improve performance on tasks such as hashing and image/scene classification.", "bibtex": "@inproceedings{NIPS2016_632cee94,\n author = {Saberian, Mohammad and Costa Pereira, Jose and Xu, Can and Yang, Jian and Nvasconcelos, Nuno},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Large Margin Discriminant Dimensionality Reduction in Prediction Space},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/632cee946db83e7a52ce5e8d6f0fed35-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/632cee946db83e7a52ce5e8d6f0fed35-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/632cee946db83e7a52ce5e8d6f0fed35-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/632cee946db83e7a52ce5e8d6f0fed35-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/632cee946db83e7a52ce5e8d6f0fed35-Reviews.html", "metareview": "", "pdf_size": 539258, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12433004185989912662&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Netflix; INESCTEC; Google; Yahoo Research; UC San Diego", "aff_domain": "netflix.com;inesctec.pt;google.com;yahoo-inc.com;ucsd.edu", "email": "netflix.com;inesctec.pt;google.com;yahoo-inc.com;ucsd.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/632cee946db83e7a52ce5e8d6f0fed35-Abstract.html", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Netflix;INESCTEC;Google;Yahoo;University of California, San Diego", "aff_unique_dep": ";;Google;Yahoo Research;", "aff_unique_url": "https://www.netflix.com;https://www.inesctec.pt;https://www.google.com;https://research.yahoo.com;https://www.ucsd.edu", "aff_unique_abbr": "Netflix;;Google;Yahoo Research;UCSD", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;San Diego", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Portugal" }, { "title": "Large-Scale Price Optimization via Network Flow", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7419", "id": "7419", "author_site": "Shinji Ito, Ryohei Fujimaki", "author": "Shinji Ito; Ryohei Fujimaki", "abstract": "This paper deals with price optimization, which is to find the best pricing strategy that maximizes revenue or profit, on the basis of demand forecasting models. Though recent advances in regression technologies have made it possible to reveal price-demand relationship of a number of multiple products, most existing price optimization methods, such as mixed integer programming formulation, cannot handle tens or hundreds of products because of their high computational costs. To cope with this problem, this paper proposes a novel approach based on network flow algorithms. We reveal a connection between supermodularity of the revenue and cross elasticity of demand. On the basis of this connection, we propose an efficient algorithm that employs network flow algorithms. The proposed algorithm can handle hundreds or thousands of products, and returns an exact optimal solution under an assumption regarding cross elasticity of demand. Even in case in which the assumption does not hold, the proposed algorithm can efficiently find approximate solutions as good as can other state-of-the-art methods, as empirical results show.", "bibtex": "@inproceedings{NIPS2016_cb79f8fa,\n author = {Ito, Shinji and Fujimaki, Ryohei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Large-Scale Price Optimization via Network Flow},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/cb79f8fa58b91d3af6c9c991f63962d3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/cb79f8fa58b91d3af6c9c991f63962d3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/cb79f8fa58b91d3af6c9c991f63962d3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/cb79f8fa58b91d3af6c9c991f63962d3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/cb79f8fa58b91d3af6c9c991f63962d3-Reviews.html", "metareview": "", "pdf_size": 436214, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5228434827708699217&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "NEC Corporation; NEC Corporation", "aff_domain": "me.jp.nec.com;nec-labs.com", "email": "me.jp.nec.com;nec-labs.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/cb79f8fa58b91d3af6c9c991f63962d3-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "NEC Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.nec.com", "aff_unique_abbr": "NEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Latent Attention For If-Then Program Synthesis", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7278", "id": "7278", "author_site": "Chang Liu, Xinyun Chen, Richard Shin, Mingcheng Chen, Dawn Song", "author": "Chang Liu; Xinyun Chen; Eui Chul Shin; Mingcheng Chen; Dawn Song", "abstract": "Automatic translation from natural language descriptions into programs is a long-standing challenging problem. In this work, we consider a simple yet important sub-problem: translation from textual descriptions to If-Then programs. We devise a novel neural network architecture for this task which we train end-to-end. Specifically, we introduce Latent Attention, which computes multiplicative weights for the words in the description in a two-stage process with the goal of better leveraging the natural language structures that indicate the relevant parts for predicting program elements. Our architecture reduces the error rate by 28.57% compared to prior art. We also propose a one-shot learning scenario of If-Then program synthesis and simulate it with our existing dataset. We demonstrate a variation on the training procedure for this scenario that outperforms the original procedure, significantly closing the gap to the model trained with all data.", "bibtex": "@inproceedings{NIPS2016_716e1b8c,\n author = {Liu, Chang and Chen, Xinyun and Shin, Eui Chul and Chen, Mingcheng and Song, Dawn},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Latent Attention For If-Then Program Synthesis},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/716e1b8c6cd17b771da77391355749f3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/716e1b8c6cd17b771da77391355749f3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/716e1b8c6cd17b771da77391355749f3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/716e1b8c6cd17b771da77391355749f3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/716e1b8c6cd17b771da77391355749f3-Reviews.html", "metareview": "", "pdf_size": 1062786, "gs_citation": 108, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11037733463062803239&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Shanghai Jiao Tong University + UC Berkeley; UC Berkeley; UC Berkeley; UC Berkeley; UIUC + UC Berkeley", "aff_domain": "sjtu.edu.cn;berkeley.edu;berkeley.edu;berkeley.edu;uiuc.edu", "email": "sjtu.edu.cn;berkeley.edu;berkeley.edu;berkeley.edu;uiuc.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/716e1b8c6cd17b771da77391355749f3-Abstract.html", "aff_unique_index": "0+1;1;1;1;2+1", "aff_unique_norm": "Shanghai Jiao Tong University;University of California, Berkeley;University of Illinois Urbana-Champaign", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.berkeley.edu;https://www illinois.edu", "aff_unique_abbr": "SJTU;UC Berkeley;UIUC", "aff_campus_unique_index": "1;1;1;1;2+1", "aff_campus_unique": ";Berkeley;Urbana-Champaign", "aff_country_unique_index": "0+1;1;1;1;1+1", "aff_country_unique": "China;United States" }, { "title": "Launch and Iterate: Reducing Prediction Churn", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7242", "id": "7242", "author_site": "Mahdi Milani Fard, Quentin Cormier, Kevin Canini, Maya Gupta", "author": "Mahdi Milani Fard; Quentin Cormier; Kevin Canini; Maya Gupta", "abstract": "Practical applications of machine learning often involve successive training iterations with changes to features and training examples. Ideally, changes in the output of any new model should only be improvements (wins) over the previous iteration, but in practice the predictions may change neutrally for many examples, resulting in extra net-zero wins and losses, referred to as unnecessary churn. These changes in the predictions are problematic for usability for some applications, and make it harder and more expensive to measure if a change is statistically significant positive. In this paper, we formulate the problem and present a stabilization operator to regularize a classifier towards a previous classifier. We use a Markov chain Monte Carlo stabilization operator to produce a model with more consistent predictions without adversely affecting accuracy. We investigate the properties of the proposal with theoretical analysis. Experiments on benchmark datasets for different classification algorithms demonstrate the method and the resulting reduction in churn.", "bibtex": "@inproceedings{NIPS2016_dc5c768b,\n author = {Milani Fard, Mahdi and Cormier, Quentin and Canini, Kevin and Gupta, Maya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Launch and Iterate: Reducing Prediction Churn},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/dc5c768b5dc76a084531934b34601977-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/dc5c768b5dc76a084531934b34601977-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/dc5c768b5dc76a084531934b34601977-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/dc5c768b5dc76a084531934b34601977-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/dc5c768b5dc76a084531934b34601977-Reviews.html", "metareview": "", "pdf_size": 284284, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11914564240390359456&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/dc5c768b5dc76a084531934b34601977-Abstract.html" }, { "title": "LazySVD: Even Faster SVD Decomposition Yet Without Agonizing Pain", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7160", "id": "7160", "author_site": "Zeyuan Allen-Zhu, Yuanzhi Li", "author": "Zeyuan Allen-Zhu; Yuanzhi Li", "abstract": "We study k-SVD that is to obtain the first k singular vectors of a matrix A. Recently, a few breakthroughs have been discovered on k-SVD: Musco and Musco [1] proved the first gap-free convergence result using the block Krylov method, Shamir [2] discovered the first variance-reduction stochastic method, and Bhojanapalli et al. [3] provided the fastest $O(\\mathsf{nnz}(A) + \\mathsf{poly}(1/\\varepsilon))$-time algorithm using alternating minimization.\r\n\r\nIn this paper, we put forward a new and simple LazySVD framework to improve the above breakthroughs. This framework leads to a faster gap-free method outperforming [1], and the first accelerated and stochastic method outperforming [2]. In the $O(\\mathsf{nnz}(A) + \\mathsf{poly}(1/\\varepsilon))$ running-time regime, LazySVD outperforms [3] in certain parameter regimes without even using alternating minimization.", "bibtex": "@inproceedings{NIPS2016_c6e19e83,\n author = {Allen-Zhu, Zeyuan and Li, Yuanzhi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {LazySVD: Even Faster SVD Decomposition Yet Without Agonizing Pain},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c6e19e830859f2cb9f7c8f8cacb8d2a6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c6e19e830859f2cb9f7c8f8cacb8d2a6-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c6e19e830859f2cb9f7c8f8cacb8d2a6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c6e19e830859f2cb9f7c8f8cacb8d2a6-Reviews.html", "metareview": "", "pdf_size": 1074194, "gs_citation": 151, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8860005667593543586&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Institute for Advanced Study + Princeton University; Princeton University", "aff_domain": "csail.mit.edu;cs.princeton.edu", "email": "csail.mit.edu;cs.princeton.edu", "github": "", "project": "https://arxiv.org/abs/1607.03463", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c6e19e830859f2cb9f7c8f8cacb8d2a6-Abstract.html", "aff_unique_index": "0+1;1", "aff_unique_norm": "Institute for Advanced Study;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://ias.edu;https://www.princeton.edu", "aff_unique_abbr": "IAS;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0", "aff_country_unique": "United States" }, { "title": "Learnable Visual Markers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7127", "id": "7127", "author_site": "Oleg Grinchuk, Vadim Lebedev, Victor Lempitsky", "author": "Oleg Grinchuk; Vadim Lebedev; Victor Lempitsky", "abstract": "We propose a new approach to designing visual markers (analogous to QR-codes, markers for augmented reality, and robotic fiducial tags) based on the advances in deep generative networks. In our approach, the markers are obtained as color images synthesized by a deep network from input bit strings, whereas another deep network is trained to recover the bit strings back from the photos of these markers. The two networks are trained simultaneously in a joint backpropagation process that takes characteristic photometric and geometric distortions associated with marker fabrication and capture into account. Additionally, a stylization loss based on statistics of activations in a pretrained classification network can be inserted into the learning in order to shift the marker appearance towards some texture prototype. In the experiments, we demonstrate that the markers obtained using our approach are capable of retaining bit strings that are long enough to be practical. The ability to automatically adapt markers according to the usage scenario and the desired capacity as well as the ability to combine information encoding with artistic stylization are the unique properties of our approach. As a byproduct, our approach provides an insight on the structure of patterns that are most suitable for recognition by ConvNets and on their ability to distinguish composite patterns.", "bibtex": "@inproceedings{NIPS2016_2d405b36,\n author = {Grinchuk, Oleg and Lebedev, Vadim and Lempitsky, Victor},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learnable Visual Markers},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2d405b367158e3f12d7c1e31a96b3af3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2d405b367158e3f12d7c1e31a96b3af3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2d405b367158e3f12d7c1e31a96b3af3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2d405b367158e3f12d7c1e31a96b3af3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2d405b367158e3f12d7c1e31a96b3af3-Reviews.html", "metareview": "", "pdf_size": 1072162, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1831198484448477788&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2d405b367158e3f12d7c1e31a96b3af3-Abstract.html" }, { "title": "Learned Region Sparsity and Diversity Also Predicts Visual Attention", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7192", "id": "7192", "author_site": "Zijun Wei, Hossein Adeli, Minh Hoai Nguyen, Greg Zelinsky, Dimitris Samaras", "author": "Zijun Wei; Hossein Adeli; Minh Hoai Nguyen; Greg Zelinsky; Dimitris Samaras", "abstract": "Learned region sparsity has achieved state-of-the-art performance in classification tasks by exploiting and integrating a sparse set of local information into global decisions. The underlying mechanism resembles how people sample information from an image with their eye movements when making similar decisions. In this paper we incorporate the biologically plausible mechanism of Inhibition of Return into the learned region sparsity model, thereby imposing diversity on the selected regions. We investigate how these mechanisms of sparsity and diversity relate to visual attention by testing our model on three different types of visual search tasks. We report state-of-the-art results in predicting the locations of human gaze fixations, even though our model is trained only on image-level labels without object location annotations. Notably, the classification performance of the extended model remains the same as the original. This work suggests a new computational perspective on visual attention mechanisms and shows how the inclusion of attention-based mechanisms can improve computer vision techniques.", "bibtex": "@inproceedings{NIPS2016_a0e2a2c5,\n author = {Wei, Zijun and Adeli, Hossein and Nguyen, Minh Hoai and Zelinsky, Greg and Samaras, Dimitris},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learned Region Sparsity and Diversity Also Predicts Visual Attention},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a0e2a2c563d57df27213ede1ac4ac780-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a0e2a2c563d57df27213ede1ac4ac780-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a0e2a2c563d57df27213ede1ac4ac780-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a0e2a2c563d57df27213ede1ac4ac780-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a0e2a2c563d57df27213ede1ac4ac780-Reviews.html", "metareview": "", "pdf_size": 7362885, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9261300074428482202&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff": "Department of Computer Science + Department of Psychology \u2013 Stony Brook University; Department of Computer Science + Department of Psychology \u2013 Stony Brook University; Department of Computer Science + Department of Psychology \u2013 Stony Brook University; Department of Computer Science; Department of Computer Science", "aff_domain": "cs.stonybrook.edu;stonybrook.edu;stonybrook.edu;cs.stonybrook.edu;cs.stonybrook.edu", "email": "cs.stonybrook.edu;stonybrook.edu;stonybrook.edu;cs.stonybrook.edu;cs.stonybrook.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a0e2a2c563d57df27213ede1ac4ac780-Abstract.html", "aff_unique_index": "0+1;0+1;0+1;0;0", "aff_unique_norm": "Unknown Institution;Stony Brook University", "aff_unique_dep": "Department of Computer Science;Department of Psychology", "aff_unique_url": ";https://www.stonybrook.edu", "aff_unique_abbr": ";SBU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Stony Brook", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";United States" }, { "title": "Learning Additive Exponential Family Graphical Models via $\\ell_{2,1}$-norm Regularized M-Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7166", "id": "7166", "author_site": "Xiaotong Yuan, Ping Li, Tong Zhang, Qingshan Liu, Guangcan Liu", "author": "Xiaotong Yuan; Ping Li; Tong Zhang; Qingshan Liu; Guangcan Liu", "abstract": "We investigate a subclass of exponential family graphical models of which the sufficient statistics are defined by arbitrary additive forms. We propose two $\\ell_{2,1}$-norm regularized maximum likelihood estimators to learn the model parameters from i.i.d. samples. The first one is a joint MLE estimator which estimates all the parameters simultaneously. The second one is a node-wise conditional MLE estimator which estimates the parameters for each node individually. For both estimators, statistical analysis shows that under mild conditions the extra flexibility gained by the additive exponential family models comes at almost no cost of statistical efficiency. A Monte-Carlo approximation method is developed to efficiently optimize the proposed estimators. The advantages of our estimators over Gaussian graphical models and Nonparanormal estimators are demonstrated on synthetic and real data sets.", "bibtex": "@inproceedings{NIPS2016_ee26fc66,\n author = {Yuan, Xiaotong and Li, Ping and Zhang, Tong and Liu, Qingshan and Liu, Guangcan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Additive Exponential Family Graphical Models via \\textbackslash ell\\_\\lbrace 2,1\\rbrace -norm Regularized M-Estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ee26fc66b1369c7625333bedafbfcaf6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ee26fc66b1369c7625333bedafbfcaf6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ee26fc66b1369c7625333bedafbfcaf6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ee26fc66b1369c7625333bedafbfcaf6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ee26fc66b1369c7625333bedafbfcaf6-Reviews.html", "metareview": "", "pdf_size": 141902, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=556563453549070710&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "B-DAT Lab, Nanjing University of Info. Sci.&Tech.; Depart. of Statistics, Rutgers University + Depart. of Computer Science, Rutgers University; Depart. of Statistics, Rutgers University; B-DAT Lab, Nanjing University of Info. Sci.&Tech.; B-DAT Lab, Nanjing University of Info. Sci.&Tech.", "aff_domain": "nuist.edu.cn;stat.rutgers.edu;stat.rutgers.edu;nuist.edu.cn;nuist.edu.cn", "email": "nuist.edu.cn;stat.rutgers.edu;stat.rutgers.edu;nuist.edu.cn;nuist.edu.cn", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ee26fc66b1369c7625333bedafbfcaf6-Abstract.html", "aff_unique_index": "0;1+1;1;0;0", "aff_unique_norm": "Nanjing University of Information Science & Technology;Rutgers University", "aff_unique_dep": "B-DAT Lab;Department of Statistics", "aff_unique_url": "http://www.nuist.edu.cn;https://www.rutgers.edu", "aff_unique_abbr": "NUIST;Rutgers", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1+1;1;0;0", "aff_country_unique": "China;United States" }, { "title": "Learning Bayesian networks with ancestral constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7272", "id": "7272", "author_site": "Eunice Yuh-Jie Chen, Yujia Shen, Arthur Choi, Adnan Darwiche", "author": "Eunice Yuh-Jie Chen; Yujia Shen; Arthur Choi; Adnan Darwiche", "abstract": "We consider the problem of learning Bayesian networks optimally, when subject to background knowledge in the form of ancestral constraints. Our approach is based on a recently proposed framework for optimal structure learning based on non-decomposable scores, which is general enough to accommodate ancestral constraints. The proposed framework exploits oracles for learning structures using decomposable scores, which cannot accommodate ancestral constraints since they are non-decomposable. We show how to empower these oracles by passing them decomposable constraints that they can handle, which are inferred from ancestral constraints that they cannot handle. Empirically, we demonstrate that our approach can be orders-of-magnitude more efficient than alternative frameworks, such as those based on integer linear programming.", "bibtex": "@inproceedings{NIPS2016_144a3f71,\n author = {Chen, Eunice Yuh-Jie and Shen, Yujia and Choi, Arthur and Darwiche, Adnan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Bayesian networks with ancestral constraints},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/144a3f71a03ab7c4f46f9656608efdb2-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/144a3f71a03ab7c4f46f9656608efdb2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/144a3f71a03ab7c4f46f9656608efdb2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/144a3f71a03ab7c4f46f9656608efdb2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/144a3f71a03ab7c4f46f9656608efdb2-Reviews.html", "metareview": "", "pdf_size": 420765, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8080276688901644232&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Computer Science Department, University of California, Los Angeles, CA 90095; Computer Science Department, University of California, Los Angeles, CA 90095; Computer Science Department, University of California, Los Angeles, CA 90095; Computer Science Department, University of California, Los Angeles, CA 90095", "aff_domain": "cs.ucla.edu;cs.ucla.edu;cs.ucla.edu;cs.ucla.edu", "email": "cs.ucla.edu;cs.ucla.edu;cs.ucla.edu;cs.ucla.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/144a3f71a03ab7c4f46f9656608efdb2-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Bound for Parameter Transfer Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7396", "id": "7396", "author": "Wataru Kumagai", "abstract": "We consider a transfer-learning problem by using the parameter transfer approach, where a suitable parameter of feature mapping is learned through one task and applied to another objective task. Then, we introduce the notion of the local stability of parametric feature mapping and parameter transfer learnability, and thereby derive a learning bound for parameter transfer algorithms. As an application of parameter transfer learning, we discuss the performance of sparse coding in self-taught learning. Although self-taught learning algorithms with plentiful unlabeled data often show excellent empirical performance, their theoretical analysis has not been studied. In this paper, we also provide the first theoretical learning bound for self-taught learning.", "bibtex": "@inproceedings{NIPS2016_7f53f8c6,\n author = {Kumagai, Wataru},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Bound for Parameter Transfer Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7f53f8c6c730af6aeb52e66eb74d8507-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7f53f8c6c730af6aeb52e66eb74d8507-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7f53f8c6c730af6aeb52e66eb74d8507-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7f53f8c6c730af6aeb52e66eb74d8507-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7f53f8c6c730af6aeb52e66eb74d8507-Reviews.html", "metareview": "", "pdf_size": 132382, "gs_citation": 20, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15756767272400319592&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7f53f8c6c730af6aeb52e66eb74d8507-Abstract.html" }, { "title": "Learning Deep Embeddings with Histogram Loss", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7358", "id": "7358", "author_site": "Evgeniya Ustinova, Victor Lempitsky", "author": "Evgeniya Ustinova; Victor Lempitsky", "abstract": "We suggest a new loss for learning deep embeddings. The key characteristics of the new loss is the absence of tunable parameters and very good results obtained across a range of datasets and problems. The loss is computed by estimating two distribution of similarities for positive (matching) and negative (non-matching) point pairs, and then computing the probability of a positive pair to have a lower similarity score than a negative pair based on these probability estimates. We show that these operations can be performed in a simple and piecewise-differentiable manner using 1D histograms with soft assignment operations. This makes the proposed loss suitable for learning deep embeddings using stochastic optimization. The experiments reveal favourable results compared to recently proposed loss functions.", "bibtex": "@inproceedings{NIPS2016_325995af,\n author = {Ustinova, Evgeniya and Lempitsky, Victor},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Deep Embeddings with Histogram Loss},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/325995af77a0e8b06d1204a171010b3a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/325995af77a0e8b06d1204a171010b3a-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/325995af77a0e8b06d1204a171010b3a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/325995af77a0e8b06d1204a171010b3a-Reviews.html", "metareview": "", "pdf_size": 455098, "gs_citation": 467, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6994791028704777674&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 11, "aff": "Skolkovo Institute of Science and Technology (Skoltech); Skolkovo Institute of Science and Technology (Skoltech)", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/325995af77a0e8b06d1204a171010b3a-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Skolkovo Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.skoltech.ru", "aff_unique_abbr": "Skoltech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Russian Federation" }, { "title": "Learning Deep Parsimonious Representations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7202", "id": "7202", "author_site": "Renjie Liao, Alex Schwing, Richard Zemel, Raquel Urtasun", "author": "Renjie Liao; Alex Schwing; Richard Zemel; Raquel Urtasun", "abstract": "In this paper we aim at facilitating generalization for deep networks while supporting interpretability of the learned representations. Towards this goal, we propose a clustering based regularization that encourages parsimonious representations. Our k-means style objective is easy to optimize and flexible supporting various forms of clustering, including sample and spatial clustering as well as co-clustering. We demonstrate the effectiveness of our approach on the tasks of unsupervised learning, classification, fine grained categorization and zero-shot learning.", "bibtex": "@inproceedings{NIPS2016_a376033f,\n author = {Liao, Renjie and Schwing, Alex and Zemel, Richard and Urtasun, Raquel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Deep Parsimonious Representations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a376033f78e144f494bfc743c0be3330-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a376033f78e144f494bfc743c0be3330-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a376033f78e144f494bfc743c0be3330-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a376033f78e144f494bfc743c0be3330-Reviews.html", "metareview": "", "pdf_size": 3827032, "gs_citation": 115, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=405047252171300685&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "University of Toronto1; University of Illinois at Urbana-Champaign2; University of Toronto1 + Canadian Institute for Advanced Research3; University of Toronto1", "aff_domain": "cs.toronto.edu;illinois.edu;cs.toronto.edu;cs.toronto.edu", "email": "cs.toronto.edu;illinois.edu;cs.toronto.edu;cs.toronto.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a376033f78e144f494bfc743c0be3330-Abstract.html", "aff_unique_index": "0;1;0+2;0", "aff_unique_norm": "University of Toronto;University of Illinois Urbana-Champaign;Canadian Institute for Advanced Research", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utoronto.ca;https://illinois.edu;https://www.cifar.ca", "aff_unique_abbr": "U of T;UIUC;CIFAR", "aff_campus_unique_index": "1;", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;1;0+0;0", "aff_country_unique": "Canada;United States" }, { "title": "Learning HMMs with Nonparametric Emissions via Spectral Decompositions of Continuous Matrices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7187", "id": "7187", "author_site": "Kirthevasan Kandasamy, Maruan Al-Shedivat, Eric Xing", "author": "Kirthevasan Kandasamy; Maruan Al-Shedivat; Eric P Xing", "abstract": "Recently, there has been a surge of interest in using spectral methods for estimating latent variable models. However, it is usually assumed that the distribution of the observations conditioned on the latent variables is either discrete or belongs to a parametric family. In this paper, we study the estimation of an $m$-state hidden Markov model (HMM) with only smoothness assumptions, such as H\\\"olderian conditions, on the emission densities. By leveraging some recent advances in continuous linear algebra and numerical analysis, we develop a computationally efficient spectral algorithm for learning nonparametric HMMs. Our technique is based on computing an SVD on nonparametric estimates of density functions by viewing them as \\emph{continuous matrices}. We derive sample complexity bounds via concentration results for nonparametric density estimation and novel perturbation theory results for continuous matrices. We implement our method using Chebyshev polynomial approximations. Our method is competitive with other baselines on synthetic and real problems and is also very computationally efficient.", "bibtex": "@inproceedings{NIPS2016_afe43465,\n author = {Kandasamy, Kirthevasan and Al-Shedivat, Maruan and Xing, Eric P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning HMMs with Nonparametric Emissions via Spectral Decompositions of Continuous Matrices},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/afe434653a898da20044041262b3ac74-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/afe434653a898da20044041262b3ac74-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/afe434653a898da20044041262b3ac74-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/afe434653a898da20044041262b3ac74-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/afe434653a898da20044041262b3ac74-Reviews.html", "metareview": "", "pdf_size": 836380, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10034469484719865144&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Carnegie Mellon University; Carnegie Mellon University; Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/afe434653a898da20044041262b3ac74-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Infinite RBMs with Frank-Wolfe", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6927", "id": "6927", "author_site": "Wei Ping, Qiang Liu, Alexander Ihler", "author": "Wei Ping; Qiang Liu; Alex Ihler", "abstract": "In this work, we propose an infinite restricted Boltzmann machine (RBM), whose maximum likelihood estimation (MLE) corresponds to a constrained convex optimization. We consider the Frank-Wolfe algorithm to solve the program, which provides a sparse solution that can be interpreted as inserting a hidden unit at each iteration, so that the optimization process takes the form of a sequence of finite models of increasing complexity. As a side benefit, this can be used to easily and efficiently identify an appropriate number of hidden units during the optimization. The resulting model can also be used as an initialization for typical state-of-the-art RBM training algorithms such as contrastive divergence, leading to models with consistently higher test likelihood than random initialization.", "bibtex": "@inproceedings{NIPS2016_df12ecd0,\n author = {Ping, Wei and Liu, Qiang and Ihler, Alexander T},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Infinite RBMs with Frank-Wolfe},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/df12ecd077efc8c23881028604dbb8cc-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/df12ecd077efc8c23881028604dbb8cc-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/df12ecd077efc8c23881028604dbb8cc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/df12ecd077efc8c23881028604dbb8cc-Reviews.html", "metareview": "", "pdf_size": 388305, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16668160135052757821&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Computer Science, UC Irvine; Computer Science, Dartmouth College; Computer Science, UC Irvine", "aff_domain": "ics.uci.edu;cs.dartmouth.edu;ics.uci.edu", "email": "ics.uci.edu;cs.dartmouth.edu;ics.uci.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/df12ecd077efc8c23881028604dbb8cc-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Irvine;Dartmouth College", "aff_unique_dep": "Department of Computer Science;Computer Science", "aff_unique_url": "https://www.uci.edu;https://dartmouth.edu", "aff_unique_abbr": "UCI;Dartmouth", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Influence Functions from Incomplete Observations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7361", "id": "7361", "author_site": "Xinran He, Ke Xu, David Kempe, Yan Liu", "author": "Xinran He; Ke Xu; David Kempe; Yan Liu", "abstract": "We study the problem of learning influence functions under incomplete observations of node activations. Incomplete observations are a major concern as most (online and real-world) social networks are not fully observable. We establish both proper and improper PAC learnability of influence functions under randomly missing observations. Proper PAC learnability under the Discrete-Time Linear Threshold (DLT) and Discrete-Time Independent Cascade (DIC) models is established by reducing incomplete observations to complete observations in a modified graph. Our improper PAC learnability result applies for the DLT and DIC models as well as the Continuous-Time Independent Cascade (CIC) model. It is based on a parametrization in terms of reachability features, and also gives rise to an efficient and practical heuristic. Experiments on synthetic and real-world datasets demonstrate the ability of our method to compensate even for a fairly large fraction of missing observations.", "bibtex": "@inproceedings{NIPS2016_68b1fbe7,\n author = {He, Xinran and Xu, Ke and Kempe, David and Liu, Yan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Influence Functions from Incomplete Observations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/68b1fbe7f16e4ae3024973f12f3cb313-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/68b1fbe7f16e4ae3024973f12f3cb313-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/68b1fbe7f16e4ae3024973f12f3cb313-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/68b1fbe7f16e4ae3024973f12f3cb313-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/68b1fbe7f16e4ae3024973f12f3cb313-Reviews.html", "metareview": "", "pdf_size": 595396, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11698454122083218219&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of Southern California, Los Angeles, CA 90089; University of Southern California, Los Angeles, CA 90089; University of Southern California, Los Angeles, CA 90089; University of Southern California, Los Angeles, CA 90089", "aff_domain": "usc.edu;usc.edu;usc.edu;usc.edu", "email": "usc.edu;usc.edu;usc.edu;usc.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/68b1fbe7f16e4ae3024973f12f3cb313-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Kernels with Random Features", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6979", "id": "6979", "author_site": "Aman Sinha, John Duchi", "author": "Aman Sinha; John C. Duchi", "abstract": "Randomized features provide a computationally efficient way to approximate kernel machines in machine learning tasks. However, such methods require a user-defined kernel as input. We extend the randomized-feature approach to the task of learning a kernel (via its associated random features). Specifically, we present an efficient optimization problem that learns a kernel in a supervised manner. We prove the consistency of the estimated kernel as well as generalization bounds for the class of estimators induced by the optimized kernel, and we experimentally evaluate our technique on several datasets. Our approach is efficient and highly scalable, and we attain competitive results with a fraction of the training cost of other techniques.", "bibtex": "@inproceedings{NIPS2016_e7061188,\n author = {Sinha, Aman and Duchi, John C},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Kernels with Random Features},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e70611883d2760c8bbafb4acb29e3446-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e70611883d2760c8bbafb4acb29e3446-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/e70611883d2760c8bbafb4acb29e3446-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e70611883d2760c8bbafb4acb29e3446-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e70611883d2760c8bbafb4acb29e3446-Reviews.html", "metareview": "", "pdf_size": 829220, "gs_citation": 138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3239120243846912079&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Departments of Electrical Engineering; Departments of Electrical Engineering and Statistics", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e70611883d2760c8bbafb4acb29e3446-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Electrical Engineering Institution;University of California, Berkeley", "aff_unique_dep": "Departments of Electrical Engineering;Departments of Electrical Engineering and Statistics", "aff_unique_url": ";https://www.berkeley.edu", "aff_unique_abbr": ";UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "1", "aff_country_unique": ";United States" }, { "title": "Learning Multiagent Communication with Backpropagation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7180", "id": "7180", "author_site": "Sainbayar Sukhbaatar, arthur szlam, Rob Fergus", "author": "Sainbayar Sukhbaatar; arthur szlam; Rob Fergus", "abstract": "Many tasks in AI require the collaboration of multiple agents. Typically, the communication protocol between agents is manually specified and not altered during training. In this paper we explore a simple neural model, called CommNet, that uses continuous communication for fully cooperative tasks. The model consists of multiple agents and the communication between them is learned alongside their policy. We apply this model to a diverse set of tasks, demonstrating the ability of the agents to learn to communicate amongst themselves, yielding improved performance over non-communicative agents and baselines. In some cases, it is possible to interpret the language devised by the agents, revealing simple but effective strategies for solving the task at hand.", "bibtex": "@inproceedings{NIPS2016_55b1927f,\n author = {Sukhbaatar, Sainbayar and szlam, arthur and Fergus, Rob},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Multiagent Communication with Backpropagation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/55b1927fdafef39c48e5b73b5d61ea60-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/55b1927fdafef39c48e5b73b5d61ea60-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/55b1927fdafef39c48e5b73b5d61ea60-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/55b1927fdafef39c48e5b73b5d61ea60-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/55b1927fdafef39c48e5b73b5d61ea60-Reviews.html", "metareview": "", "pdf_size": 3890027, "gs_citation": 1477, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5233243549252503864&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Dept. of Computer Science, Courant Institute, New York University; Facebook AI Research, New York; Facebook AI Research, New York", "aff_domain": "cs.nyu.edu;fb.com;fb.com", "email": "cs.nyu.edu;fb.com;fb.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/55b1927fdafef39c48e5b73b5d61ea60-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "New York University;Meta", "aff_unique_dep": "Dept. of Computer Science;Facebook AI Research", "aff_unique_url": "https://www.nyu.edu;https://research.facebook.com", "aff_unique_abbr": "NYU;FAIR", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "New York", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Parametric Sparse Models for Image Super-Resolution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7003", "id": "7003", "author_site": "Yongbo Li, Weisheng Dong, Xuemei Xie, GUANGMING Shi, Xin Li, Donglai Xu", "author": "Yongbo Li; Weisheng Dong; Xuemei Xie; GUANGMING Shi; Xin Li; Donglai Xu", "abstract": "Learning accurate prior knowledge of natural images is of great importance for single image super-resolution (SR). Existing SR methods either learn the prior from the low/high-resolution patch pairs or estimate the prior models from the input low-resolution (LR) image. Specifically, high-frequency details are learned in the former methods. Though effective, they are heuristic and have limitations in dealing with blurred LR images; while the latter suffers from the limitations of frequency aliasing. In this paper, we propose to combine those two lines of ideas for image super-resolution. More specifically, the parametric sparse prior of the desirable high-resolution (HR) image patches are learned from both the input low-resolution (LR) image and a training image dataset. With the learned sparse priors, the sparse codes and thus the HR image patches can be accurately recovered by solving a sparse coding problem. Experimental results show that the proposed SR method outperforms existing state-of-the-art methods in terms of both subjective and objective image qualities.", "bibtex": "@inproceedings{NIPS2016_619205da,\n author = {Li, Yongbo and Dong, Weisheng and Xie, Xuemei and Shi, GUANGMING and Li, Xin and Xu, Donglai},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Parametric Sparse Models for Image Super-Resolution},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/619205da514e83f869515c782a328d3c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/619205da514e83f869515c782a328d3c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/619205da514e83f869515c782a328d3c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/619205da514e83f869515c782a328d3c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/619205da514e83f869515c782a328d3c-Reviews.html", "metareview": "", "pdf_size": 2459152, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16137383848573389484&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "State Key Lab. of ISN, School of Electronic Engineering, Xidian University, China; State Key Lab. of ISN, School of Electronic Engineering, Xidian University, China + Key Lab. of IPIU (Chinese Ministry of Education), Xidian University, China; State Key Lab. of ISN, School of Electronic Engineering, Xidian University, China; Key Lab. of IPIU (Chinese Ministry of Education), Xidian University, China + Lane Dep. of CSEE, West Virginia University, USA; Lane Dep. of CSEE, West Virginia University, USA; Sch. of Sci. and Eng., Teesside University, UK", "aff_domain": "stu.xidian.edu.cn;mail.xidian.edu.cn;mail.xidian.edu.cn;xidian.edu.cn;mail.wvu.edu; ", "email": "stu.xidian.edu.cn;mail.xidian.edu.cn;mail.xidian.edu.cn;xidian.edu.cn;mail.wvu.edu; ", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/619205da514e83f869515c782a328d3c-Abstract.html", "aff_unique_index": "0;0+0;0;0+1;1;2", "aff_unique_norm": "Xidian University;West Virginia University;Teesside University", "aff_unique_dep": "School of Electronic Engineering;Lane Department of Computer Science and Electrical Engineering;School of Science and Engineering", "aff_unique_url": "http://www.xidian.edu.cn/;https://www.wvu.edu;https://www.teesside.ac.uk", "aff_unique_abbr": "Xidian;WVU;Teesside", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;0+0;0;0+1;1;2", "aff_country_unique": "China;United States;United Kingdom" }, { "title": "Learning Sensor Multiplexing Design through Back-propagation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7369", "id": "7369", "author": "Ayan Chakrabarti", "abstract": "Recent progress on many imaging and vision tasks has been driven by the use of deep feed-forward neural networks, which are trained by propagating gradients of a loss defined on the final output, back through the network up to the first layer that operates directly on the image. We propose back-propagating one step further---to learn camera sensor designs jointly with networks that carry out inference on the images they capture. In this paper, we specifically consider the design and inference problems in a typical color camera---where the sensor is able to measure only one color channel at each pixel location, and computational inference is required to reconstruct a full color image. We learn the camera sensor's color multiplexing pattern by encoding it as layer whose learnable weights determine which color channel, from among a fixed set, will be measured at each location. These weights are jointly trained with those of a reconstruction network that operates on the corresponding sensor measurements to produce a full color image. Our network achieves significant improvements in accuracy over the traditional Bayer pattern used in most color cameras. It automatically learns to employ a sparse color measurement approach similar to that of a recent design, and moreover, improves upon that design by learning an optimal layout for these measurements.", "bibtex": "@inproceedings{NIPS2016_aa486f25,\n author = {Chakrabarti, Ayan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Sensor Multiplexing Design through Back-propagation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/aa486f25175cbdc3854151288a645c19-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/aa486f25175cbdc3854151288a645c19-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/aa486f25175cbdc3854151288a645c19-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/aa486f25175cbdc3854151288a645c19-Reviews.html", "metareview": "", "pdf_size": 2201921, "gs_citation": 169, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17630358203373859533&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Toyota Technological Institute at Chicago", "aff_domain": "ttic.edu", "email": "ttic.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/aa486f25175cbdc3854151288a645c19-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Toyota Technological Institute at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.tti-chicago.org", "aff_unique_abbr": "TTI Chicago", "aff_campus_unique_index": "0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Learning Sparse Gaussian Graphical Models with Overlapping Blocks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7372", "id": "7372", "author_site": "Mohammad Javad Hosseini, Su-In Lee", "author": "Mohammad Javad Hosseini; Su-In Lee", "abstract": "We present a novel framework, called GRAB (GRaphical models with overlApping Blocks), to capture densely connected components in a network estimate. GRAB takes as input a data matrix of p variables and n samples, and jointly learns both a network among p variables and densely connected groups of variables (called `blocks'). GRAB has four major novelties as compared to existing network estimation methods: 1) It does not require the blocks to be given a priori. 2) Blocks can overlap. 3) It can jointly learn a network structure and overlapping blocks. 4) It solves a joint optimization problem with the block coordinate descent method that is convex in each step. We show that GRAB reveals the underlying network structure substantially better than four state-of-the-art competitors on synthetic data. When applied to cancer gene expression data, GRAB outperforms its competitors in revealing known functional gene sets and potentially novel genes that drive cancer.", "bibtex": "@inproceedings{NIPS2016_6be5336d,\n author = {Hosseini, Mohammad Javad and Lee, Su-In},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Sparse Gaussian Graphical Models with Overlapping Blocks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6be5336db2c119736cf48f475e051bfe-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6be5336db2c119736cf48f475e051bfe-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6be5336db2c119736cf48f475e051bfe-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6be5336db2c119736cf48f475e051bfe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6be5336db2c119736cf48f475e051bfe-Reviews.html", "metareview": "", "pdf_size": 1276742, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12408927136658699182&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Department of Computer Science & Engineering, University of Washington, Seattle + Department of Genome Sciences, University of Washington, Seattle; Department of Computer Science & Engineering, University of Washington, Seattle + Department of Genome Sciences, University of Washington, Seattle", "aff_domain": "cs.washington.edu;cs.washington.edu", "email": "cs.washington.edu;cs.washington.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6be5336db2c119736cf48f475e051bfe-Abstract.html", "aff_unique_index": "0+0;0+0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Department of Computer Science & Engineering", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0+0;0+0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0+0;0+0", "aff_country_unique": "United States" }, { "title": "Learning Structured Sparsity in Deep Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7205", "id": "7205", "author_site": "Wei Wen, Chunpeng Wu, Yandan Wang, Yiran Chen, Hai Li", "author": "Wei Wen; Chunpeng Wu; Yandan Wang; Yiran Chen; Hai Li", "abstract": "High demand for computation resources severely hinders deployment of large-scale Deep Neural Networks (DNN) in resource constrained devices. In this work, we propose a Structured Sparsity Learning (SSL) method to regularize the structures (i.e., filters, channels, filter shapes, and layer depth) of DNNs. SSL can: (1) learn a compact structure from a bigger DNN to reduce computation cost; (2) obtain a hardware-friendly structured sparsity of DNN to efficiently accelerate the DNN\u2019s evaluation. Experimental results show that SSL achieves on average 5.1X and 3.1X speedups of convolutional layer computation of AlexNet against CPU and GPU, respectively, with off-the-shelf libraries. These speedups are about twice speedups of non-structured sparsity; (3) regularize the DNN structure to improve classification accuracy. The results show that for CIFAR-10, regularization on layer depth reduces a 20-layer Deep Residual Network (ResNet) to 18 layers while improves the accuracy from 91.25% to 92.60%, which is still higher than that of original ResNet with 32 layers. For AlexNet, SSL reduces the error by ~1%.", "bibtex": "@inproceedings{NIPS2016_41bfd20a,\n author = {Wen, Wei and Wu, Chunpeng and Wang, Yandan and Chen, Yiran and Li, Hai},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Structured Sparsity in Deep Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/41bfd20a38bb1b0bec75acf0845530a7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/41bfd20a38bb1b0bec75acf0845530a7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/41bfd20a38bb1b0bec75acf0845530a7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/41bfd20a38bb1b0bec75acf0845530a7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/41bfd20a38bb1b0bec75acf0845530a7-Reviews.html", "metareview": "", "pdf_size": 1015427, "gs_citation": 3145, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14407046964194758297&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 16, "aff": "University of Pittsburgh; University of Pittsburgh; University of Pittsburgh; University of Pittsburgh; University of Pittsburgh", "aff_domain": "pitt.edu;pitt.edu;pitt.edu;pitt.edu;pitt.edu", "email": "pitt.edu;pitt.edu;pitt.edu;pitt.edu;pitt.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/41bfd20a38bb1b0bec75acf0845530a7-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Pittsburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.pitt.edu", "aff_unique_abbr": "Pitt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Supervised PageRank with Gradient-Based and Gradient-Free Optimization Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8507", "id": "8507", "author_site": "Lev Bogolubsky, Pavel Dvurechenskii, Alexander Gasnikov, Gleb Gusev, Yurii Nesterov, Andrei M Raigorodskii, Aleksey Tikhonov, Maksim Zhukovskii", "author": "Lev Bogolubsky; Pavel Dvurechenskii; Alexander Gasnikov; Gleb Gusev; Yurii Nesterov; Andrei M Raigorodskii; Aleksey Tikhonov; Maksim Zhukovskii", "abstract": "In this paper, we consider a non-convex loss-minimization problem of learning Supervised PageRank models, which can account for features of nodes and edges. We propose gradient-based and random gradient-free methods to solve this problem. Our algorithms are based on the concept of an inexact oracle and unlike the state-of-the-art gradient-based method we manage to provide theoretically the convergence rate guarantees for both of them. Finally, we compare the performance of the proposed optimization methods with the state of the art applied to a ranking task.", "bibtex": "@inproceedings{NIPS2016_1f34004e,\n author = {Bogolubsky, Lev and Dvurechenskii, Pavel and Gasnikov, Alexander and Gusev, Gleb and Nesterov, Yurii and Raigorodskii, Andrei M and Tikhonov, Aleksey and Zhukovskii, Maksim},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Supervised PageRank with Gradient-Based and Gradient-Free Optimization Methods},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1f34004ebcb05f9acda6016d5cc52d5e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1f34004ebcb05f9acda6016d5cc52d5e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/1f34004ebcb05f9acda6016d5cc52d5e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1f34004ebcb05f9acda6016d5cc52d5e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1f34004ebcb05f9acda6016d5cc52d5e-Reviews.html", "metareview": "", "pdf_size": 395380, "gs_citation": 90, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13753142414804058149&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Yandex+Moscow State University+Buryat State University; Yandex+Moscow State University+Buryat State University; Yandex+Moscow State University+Buryat State University; Yandex; Yandex+Moscow State University+Buryat State University; Weierstrass Institute+Institute for Information Transmission Problems RAS+Moscow Institute of Physics and Technology; Institute for Information Transmission Problems RAS+Moscow Institute of Physics and Technology; Center for Operations Research and Econometrics+Higher School of Economics", "aff_domain": "yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru;wias-berlin.de;yandex.ru;uclouvain.be", "email": "yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru;yandex-team.ru;wias-berlin.de;yandex.ru;uclouvain.be", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1f34004ebcb05f9acda6016d5cc52d5e-Abstract.html", "aff_unique_index": "0+1+2;0+1+2;0+1+2;0;0+1+2;3+4+5;4+5;6+7", "aff_unique_norm": "Yandex;Moscow State University;Buryat State University;Weierstrass Institute for Applied Analysis and Stochastics;Institute for Information Transmission Problems;Moscow Institute of Physics and Technology;Center for Operations Research and Econometrics;Higher School of Economics", "aff_unique_dep": ";;;;;;Operations Research and Econometrics;", "aff_unique_url": "https://yandex.com;https://www.msu.ru;http://www.bsu.ru;https://www.wias-berlin.de/;http://www.iitp.ru;https://www.mipt.ru/en;https://core.ucl.ac.be;https://www.hse.ru", "aff_unique_abbr": "Yandex;MSU;;WIAS;IITP RAS;MIPT;CORE;HSE", "aff_campus_unique_index": ";;;;;;", "aff_campus_unique": "", "aff_country_unique_index": "0+0+0;0+0+0;0+0+0;0;0+0+0;1+0+0;0+0;2+0", "aff_country_unique": "Russian Federation;Germany;Belgium" }, { "title": "Learning Transferrable Representations for Unsupervised Domain Adaptation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6892", "id": "6892", "author_site": "Ozan Sener, Hyun Oh Song, Ashutosh Saxena, Silvio Savarese", "author": "Ozan Sener; Hyun Oh Song; Ashutosh Saxena; Silvio Savarese", "abstract": "Supervised learning with large scale labelled datasets and deep layered models has caused a paradigm shift in diverse areas in learning and recognition. However, this approach still suffers from generalization issues under the presence of a domain shift between the training and the test data distribution. Since unsupervised domain adaptation algorithms directly address this domain shift problem between a labelled source dataset and an unlabelled target dataset, recent papers have shown promising results by fine-tuning the networks with domain adaptation loss functions which try to align the mismatch between the training and testing data distributions. Nevertheless, these recent deep learning based domain adaptation approaches still suffer from issues such as high sensitivity to the gradient reversal hyperparameters and overfitting during the fine-tuning stage. In this paper, we propose a unified deep learning framework where the representation, cross domain transformation, and target label inference are all jointly optimized in an end-to-end fashion for unsupervised domain adaptation. Our experiments show that the proposed method significantly outperforms state-of-the-art algorithms in both object recognition and digit classification experiments by a large margin. We will make our learned models as well as the source code available immediately upon acceptance.", "bibtex": "@inproceedings{NIPS2016_b59c67bf,\n author = {Sener, Ozan and Song, Hyun Oh and Saxena, Ashutosh and Savarese, Silvio},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Transferrable Representations for Unsupervised Domain Adaptation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b59c67bf196a4758191e42f76670ceba-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b59c67bf196a4758191e42f76670ceba-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b59c67bf196a4758191e42f76670ceba-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b59c67bf196a4758191e42f76670ceba-Reviews.html", "metareview": "", "pdf_size": 18099519, "gs_citation": 345, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13960209037512838294&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Stanford University; Stanford University; Brain of Things; Stanford University", "aff_domain": "cs.stanford.edu;cs.stanford.edu;cs.stanford.edu;cs.stanford.edu", "email": "cs.stanford.edu;cs.stanford.edu;cs.stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b59c67bf196a4758191e42f76670ceba-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Stanford University;Brain Of Things", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.brainofthings.com", "aff_unique_abbr": "Stanford;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning Tree Structured Potential Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6906", "id": "6906", "author_site": "Vikas Garg, Tommi Jaakkola", "author": "Vikas Garg; Tommi Jaakkola", "abstract": "Many real phenomena, including behaviors, involve strategic interactions that can be learned from data. We focus on learning tree structured potential games where equilibria are represented by local maxima of an underlying potential function. We cast the learning problem within a max margin setting and show that the problem is NP-hard even when the strategic interactions form a tree. We develop a variant of dual decomposition to estimate the underlying game and demonstrate with synthetic and real decision/voting data that the game theoretic perspective (carving out local maxima) enables meaningful recovery.", "bibtex": "@inproceedings{NIPS2016_22ac3c5a,\n author = {Garg, Vikas and Jaakkola, Tommi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Tree Structured Potential Games},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/22ac3c5a5bf0b520d281c122d1490650-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/22ac3c5a5bf0b520d281c122d1490650-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/22ac3c5a5bf0b520d281c122d1490650-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/22ac3c5a5bf0b520d281c122d1490650-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/22ac3c5a5bf0b520d281c122d1490650-Reviews.html", "metareview": "", "pdf_size": 383163, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17511240302147641269&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "CSAIL, MIT; CSAIL, MIT", "aff_domain": "csail.mit.edu;csail.mit.edu", "email": "csail.mit.edu;csail.mit.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/22ac3c5a5bf0b520d281c122d1490650-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.csail.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning Treewidth-Bounded Bayesian Networks with Thousands of Variables", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7113", "id": "7113", "author_site": "Mauro Scanagatta, Giorgio Corani, Cassio de Campos, Marco Zaffalon", "author": "Mauro Scanagatta; Giorgio Corani; Cassio P de Campos; Marco Zaffalon", "abstract": "We present a method for learning treewidth-bounded Bayesian networks from data sets containing thousands of variables. Bounding the treewidth of a Bayesian network greatly reduces the complexity of inferences. Yet, being a global property of the graph, it considerably increases the difficulty of the learning process. Our novel algorithm accomplishes this task, scaling both to large domains and to large treewidths. Our novel approach consistently outperforms the state of the art on experiments with up to thousands of variables.", "bibtex": "@inproceedings{NIPS2016_e2a2dcc3,\n author = {Scanagatta, Mauro and Corani, Giorgio and de Campos, Cassio P and Zaffalon, Marco},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning Treewidth-Bounded Bayesian Networks with Thousands of Variables},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e2a2dcc36a08a345332c751b2f2e476c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e2a2dcc36a08a345332c751b2f2e476c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e2a2dcc36a08a345332c751b2f2e476c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e2a2dcc36a08a345332c751b2f2e476c-Reviews.html", "metareview": "", "pdf_size": 264222, "gs_citation": 56, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2031561577206881147&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "IDSIA\u2217, SUPSI\u2020, USI\u2021; IDSIA\u2217, SUPSI\u2020, USI\u2021; Queen\u2019s University Belfast; IDSIA\u2217", "aff_domain": "idsia.ch;idsia.ch;qub.ac.uk;idsia.ch", "email": "idsia.ch;idsia.ch;qub.ac.uk;idsia.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e2a2dcc36a08a345332c751b2f2e476c-Abstract.html", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "IDSIA;Queen's University Belfast;Institute of Digital Science and Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.idsia.ch;https://www.qub.ac.uk;https://www.idsia.ch/", "aff_unique_abbr": "IDSIA;QUB;IDSIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Switzerland;United Kingdom" }, { "title": "Learning User Perceived Clusters with Feature-Level Supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7139", "id": "7139", "author_site": "Ting-Yu Cheng, Guiguan Lin, xinyang gong, Kang-Jun Liu, Shan-Hung (Brandon) Wu", "author": "Ting-Yu Cheng; Guiguan Lin; xinyang gong; Kang-Jun Liu; Shan-Hung (Brandon) Wu", "abstract": "Semi-supervised clustering algorithms have been proposed to identify data clusters that align with user perceived ones via the aid of side information such as seeds or pairwise constrains. However, traditional side information is mostly at the instance level and subject to the sampling bias, where non-randomly sampled instances in the supervision can mislead the algorithms to wrong clusters. In this paper, we propose learning from the feature-level supervision. We show that this kind of supervision can be easily obtained in the form of perception vectors in many applications. Then we present novel algorithms, called Perception Embedded (PE) clustering, that exploit the perception vectors as well as traditional side information to find clusters perceived by the user. Extensive experiments are conducted on real datasets and the results demonstrate the effectiveness of PE empirically.", "bibtex": "@inproceedings{NIPS2016_9c838d2e,\n author = {Cheng, Ting-Yu and Lin, Guiguan and gong, xinyang and Liu, Kang-Jun and Wu, Shan-Hung (Brandon)},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning User Perceived Clusters with Feature-Level Supervision},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9c838d2e45b2ad1094d42f4ef36764f6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9c838d2e45b2ad1094d42f4ef36764f6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9c838d2e45b2ad1094d42f4ef36764f6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9c838d2e45b2ad1094d42f4ef36764f6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9c838d2e45b2ad1094d42f4ef36764f6-Reviews.html", "metareview": "", "pdf_size": 4850604, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=736321408221569907&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9c838d2e45b2ad1094d42f4ef36764f6-Abstract.html" }, { "title": "Learning What and Where to Draw", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7407", "id": "7407", "author_site": "Scott E Reed, Zeynep Akata, Santosh Mohan, Samuel Tenka, Bernt Schiele, Honglak Lee", "author": "Scott E Reed; Zeynep Akata; Santosh Mohan; Samuel Tenka; Bernt Schiele; Honglak Lee", "abstract": "Generative Adversarial Networks (GANs) have recently demonstrated the capability to synthesize compelling real-world images, such as room interiors, album covers, manga, faces, birds, and flowers. While existing models can synthesize images based on global constraints such as a class label or caption, they do not provide control over pose or object location. We propose a new model, the Generative Adversarial What-Where Network (GAWWN), that synthesizes images given instructions describing what content to draw in which location. We show high-quality 128 \u00d7 128 image synthesis on the Caltech-UCSD Birds dataset, conditioned on both informal text descriptions and also object location. Our system exposes control over both the bounding box around the bird and its constituent parts. By modeling the conditional distributions over part locations, our system also enables conditioning on arbitrary subsets of parts (e.g. only the beak and tail), yielding an efficient interface for picking part locations.", "bibtex": "@inproceedings{NIPS2016_a8f15eda,\n author = {Reed, Scott E and Akata, Zeynep and Mohan, Santosh and Tenka, Samuel and Schiele, Bernt and Lee, Honglak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning What and Where to Draw},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a8f15eda80c50adb0e71943adc8015cf-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a8f15eda80c50adb0e71943adc8015cf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a8f15eda80c50adb0e71943adc8015cf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a8f15eda80c50adb0e71943adc8015cf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a8f15eda80c50adb0e71943adc8015cf-Reviews.html", "metareview": "", "pdf_size": 5540188, "gs_citation": 959, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2100133886684978488&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "University of Michigan, Ann Arbor, USA+DeepMind; Max Planck Institute for Informatics, Saarbr\u00fccken, Germany; University of Michigan, Ann Arbor, USA; University of Michigan, Ann Arbor, USA; Max Planck Institute for Informatics, Saarbr\u00fccken, Germany; University of Michigan, Ann Arbor, USA", "aff_domain": "google.com;mpi-inf.mpg.de;umich.edu;umich.edu;mpi-inf.mpg.de;umich.edu", "email": "google.com;mpi-inf.mpg.de;umich.edu;umich.edu;mpi-inf.mpg.de;umich.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a8f15eda80c50adb0e71943adc8015cf-Abstract.html", "aff_unique_index": "0+1;2;0;0;2;0", "aff_unique_norm": "University of Michigan;DeepMind;Max Planck Institute for Informatics", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umich.edu;https://deepmind.com;https://mpi-inf.mpg.de", "aff_unique_abbr": "UM;DeepMind;MPII", "aff_campus_unique_index": "0;2;0;0;2;0", "aff_campus_unique": "Ann Arbor;;Saarbr\u00fccken", "aff_country_unique_index": "0+1;2;0;0;2;0", "aff_country_unique": "United States;United Kingdom;Germany" }, { "title": "Learning a Metric Embedding for Face Recognition using the Multibatch Method", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6958", "id": "6958", "author_site": "Oren Tadmor, Tal Rosenwein, Shai Shalev-Shwartz, Yonatan Wexler, Amnon Shashua", "author": "Oren Tadmor; Tal Rosenwein; Shai Shalev-Shwartz; Yonatan Wexler; Amnon Shashua", "abstract": "This work is motivated by the engineering task of achieving a near state-of-the-art face recognition on a minimal computing budget running on an embedded system. Our main technical contribution centers around a novel training method, called Multibatch, for similarity learning, i.e., for the task of generating an invariant ``face signature'' through training pairs of ``same'' and ``not-same'' face images. The Multibatch method first generates signatures for a mini-batch of $k$ face images and then constructs an unbiased estimate of the full gradient by relying on all $k^2-k$ pairs from the mini-batch. We prove that the variance of the Multibatch estimator is bounded by $O(1/k^2)$, under some mild conditions. In contrast, the standard gradient estimator that relies on random $k/2$ pairs has a variance of order $1/k$. The smaller variance of the Multibatch estimator significantly speeds up the convergence rate of stochastic gradient descent. Using the Multibatch method we train a deep convolutional neural network that achieves an accuracy of $98.2\\%$ on the LFW benchmark, while its prediction runtime takes only $30$msec on a single ARM Cortex A9 core. Furthermore, the entire training process took only 12 hours on a single Titan X GPU.", "bibtex": "@inproceedings{NIPS2016_d840cc5d,\n author = {Tadmor, Oren and Rosenwein, Tal and Shalev-Shwartz, Shai and Wexler, Yonatan and Shashua, Amnon},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning a Metric Embedding for Face Recognition using the Multibatch Method},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d840cc5d906c3e9c84374c8919d2074e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d840cc5d906c3e9c84374c8919d2074e-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d840cc5d906c3e9c84374c8919d2074e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d840cc5d906c3e9c84374c8919d2074e-Reviews.html", "metareview": "", "pdf_size": 221537, "gs_citation": 67, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14185572408448395238&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d840cc5d906c3e9c84374c8919d2074e-Abstract.html" }, { "title": "Learning a Probabilistic Latent Space of Object Shapes via 3D Generative-Adversarial Modeling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7371", "id": "7371", "author_site": "Jiajun Wu, Chengkai Zhang, Tianfan Xue, Bill Freeman, Josh Tenenbaum", "author": "Jiajun Wu; Chengkai Zhang; Tianfan Xue; Bill Freeman; Josh Tenenbaum", "abstract": "We study the problem of 3D object generation. We propose a novel framework, namely 3D Generative Adversarial Network (3D-GAN), which generates 3D objects from a probabilistic space by leveraging recent advances in volumetric convolutional networks and generative adversarial nets. The benefits of our model are three-fold: first, the use of an adversarial criterion, instead of traditional heuristic criteria, enables the generator to capture object structure implicitly and to synthesize high-quality 3D objects; second, the generator establishes a mapping from a low-dimensional probabilistic space to the space of 3D objects, so that we can sample objects without a reference image or CAD models, and explore the 3D object manifold; third, the adversarial discriminator provides a powerful 3D shape descriptor which, learned without supervision, has wide applications in 3D object recognition. Experiments demonstrate that our method generates high-quality 3D objects, and our unsupervisedly learned features achieve impressive performance on 3D object recognition, comparable with those of supervised learning methods.", "bibtex": "@inproceedings{NIPS2016_44f683a8,\n author = {Wu, Jiajun and Zhang, Chengkai and Xue, Tianfan and Freeman, Bill and Tenenbaum, Josh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning a Probabilistic Latent Space of Object Shapes via 3D Generative-Adversarial Modeling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/44f683a84163b3523afe57c2e008bc8c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/44f683a84163b3523afe57c2e008bc8c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/44f683a84163b3523afe57c2e008bc8c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/44f683a84163b3523afe57c2e008bc8c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/44f683a84163b3523afe57c2e008bc8c-Reviews.html", "metareview": "", "pdf_size": 8061941, "gs_citation": 2495, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1838944634579790374&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "MIT CSAIL; MIT CSAIL; MIT CSAIL; MIT CSAIL, Google Research; MIT CSAIL", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "email": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/44f683a84163b3523afe57c2e008bc8c-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.csail.mit.edu", "aff_unique_abbr": "MIT CSAIL", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning and Forecasting Opinion Dynamics in Social Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7023", "id": "7023", "author_site": "Abir De, Isabel Valera, Niloy Ganguly, Sourangshu Bhattacharya, Manuel Gomez Rodriguez", "author": "Abir De; Isabel Valera; Niloy Ganguly; Sourangshu Bhattacharya; Manuel Gomez Rodriguez", "abstract": "Social media and social networking sites have become a global pinboard for exposition and discussion of news, topics, and ideas, where social media users often update their opinions about a particular topic by learning from the opinions shared by their friends. In this context, can we learn a data-driven model of opinion dynamics that is able to accurately forecast users' opinions? In this paper, we introduce SLANT, a probabilistic modeling framework of opinion dynamics, which represents users' opinions over time by means of marked jump diffusion stochastic differential equations, and allows for efficient model simulation and parameter estimation from historical fine grained event data. We then leverage our framework to derive a set of efficient predictive formulas for opinion forecasting and identify conditions under which opinions converge to a steady state. Experiments on data gathered from Twitter show that our model provides a good fit to the data and our formulas achieve more accurate forecasting than alternatives.", "bibtex": "@inproceedings{NIPS2016_f340f1b1,\n author = {De, Abir and Valera, Isabel and Ganguly, Niloy and Bhattacharya, Sourangshu and Gomez Rodriguez, Manuel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning and Forecasting Opinion Dynamics in Social Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f340f1b1f65b6df5b5e3f94d95b11daf-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f340f1b1f65b6df5b5e3f94d95b11daf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f340f1b1f65b6df5b5e3f94d95b11daf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f340f1b1f65b6df5b5e3f94d95b11daf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f340f1b1f65b6df5b5e3f94d95b11daf-Reviews.html", "metareview": "", "pdf_size": 1482439, "gs_citation": 137, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2910879808326770665&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "IIT Kharagpur\u2217; MPI for Software Systems\u2020; IIT Kharagpur\u2217; IIT Kharagpur\u2217; MPI for Software Systems\u2020", "aff_domain": "cse.iitkgp.ernet.in;mpi-sws.org;cse.iitkgp.ernet.in;cse.iitkgp.ernet.in;mpi-sws.org", "email": "cse.iitkgp.ernet.in;mpi-sws.org;cse.iitkgp.ernet.in;cse.iitkgp.ernet.in;mpi-sws.org", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f340f1b1f65b6df5b5e3f94d95b11daf-Abstract.html", "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Indian Institute of Technology Kharagpur;Max Planck Institute for Software Systems", "aff_unique_dep": ";Software Systems", "aff_unique_url": "https://www.iitkgp.ac.in;https://www.mpi-sws.org", "aff_unique_abbr": "IIT KGP;MPI-SWS", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Kharagpur;", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "India;Germany" }, { "title": "Learning brain regions via large-scale online structured sparse dictionary learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7005", "id": "7005", "author_site": "Elvis DOHMATOB, Arthur Mensch, Gael Varoquaux, Bertrand Thirion", "author": "Elvis DOHMATOB; Arthur Mensch; Gael Varoquaux; Bertrand Thirion", "abstract": "We propose a multivariate online dictionary-learning method for obtaining decompositions of brain images with structured and sparse components (aka atoms). Sparsity is to be understood in the usual sense: the dictionary atoms are constrained to contain mostly zeros. This is imposed via an $\\ell_1$-norm constraint. By \"structured\", we mean that the atoms are piece-wise smooth and compact, thus making up blobs, as opposed to scattered patterns of activation. We propose to use a Sobolev (Laplacian) penalty to impose this type of structure. Combining the two penalties, we obtain decompositions that properly delineate brain structures from functional images. This non-trivially extends the online dictionary-learning work of Mairal et al. (2010), at the price of only a factor of 2 or 3 on the overall running time. Just like the Mairal et al. (2010) reference method, the online nature of our proposed algorithm allows it to scale to arbitrarily sized datasets. Experiments on brain data show that our proposed method extracts structured and denoised dictionaries that are more intepretable and better capture inter-subject variability in small medium, and large-scale regimes alike, compared to state-of-the-art models.", "bibtex": "@inproceedings{NIPS2016_130f1a8e,\n author = {DOHMATOB, Elvis and Mensch, Arthur and Varoquaux, Gael and Thirion, Bertrand},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning brain regions via large-scale online structured sparse dictionary learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/130f1a8e9e102707f3f91b010f151b0b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/130f1a8e9e102707f3f91b010f151b0b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/130f1a8e9e102707f3f91b010f151b0b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/130f1a8e9e102707f3f91b010f151b0b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/130f1a8e9e102707f3f91b010f151b0b-Reviews.html", "metareview": "", "pdf_size": 2151827, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9178760743931568834&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Parietal Team, INRIA / CEA, Neurospin, Universit\u00e9 Paris-Saclay, France; Parietal Team, INRIA / CEA, Neurospin, Universit\u00e9 Paris-Saclay, France; Parietal Team, INRIA / CEA, Neurospin, Universit\u00e9 Paris-Saclay, France; Parietal Team, INRIA / CEA, Neurospin, Universit\u00e9 Paris-Saclay, France", "aff_domain": "inria.fr;inria.fr;inria.fr;inria.fr", "email": "inria.fr;inria.fr;inria.fr;inria.fr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/130f1a8e9e102707f3f91b010f151b0b-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "Parietal Team", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Learning feed-forward one-shot learners", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7290", "id": "7290", "author_site": "Luca Bertinetto, Jo\u00e3o Henriques, Jack Valmadre, Philip Torr, Andrea Vedaldi", "author": "Luca Bertinetto; Jo\u00e3o F. Henriques; Jack Valmadre; Philip Torr; Andrea Vedaldi", "abstract": "One-shot learning is usually tackled by using generative models or discriminative embeddings. Discriminative methods based on deep learning, which are very effective in other learning scenarios, are ill-suited for one-shot learning as they need large amounts of training data. In this paper, we propose a method to learn the parameters of a deep model in one shot. We construct the learner as a second deep network, called a learnet, which predicts the parameters of a pupil network from a single exemplar. In this manner we obtain an efficient feed-forward one-shot learner, trained end-to-end by minimizing a one-shot classification objective in a learning to learn formulation. In order to make the construction feasible, we propose a number of factorizations of the parameters of the pupil network. We demonstrate encouraging results by learning characters from single exemplars in Omniglot, and by tracking visual objects from a single initial exemplar in the Visual Object Tracking benchmark.", "bibtex": "@inproceedings{NIPS2016_839ab468,\n author = {Bertinetto, Luca and Henriques, Jo\\~{a}o F. and Valmadre, Jack and Torr, Philip and Vedaldi, Andrea},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning feed-forward one-shot learners},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/839ab46820b524afda05122893c2fe8e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/839ab46820b524afda05122893c2fe8e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/839ab46820b524afda05122893c2fe8e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/839ab46820b524afda05122893c2fe8e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/839ab46820b524afda05122893c2fe8e-Reviews.html", "metareview": "", "pdf_size": 706329, "gs_citation": 576, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8558583381725749208&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "University of Oxford; University of Oxford; University of Oxford; University of Oxford; University of Oxford", "aff_domain": "robots.ox.ac.uk;robots.ox.ac.uk;robots.ox.ac.uk;eng.ox.ac.uk;robots.ox.ac.uk", "email": "robots.ox.ac.uk;robots.ox.ac.uk;robots.ox.ac.uk;eng.ox.ac.uk;robots.ox.ac.uk", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/839ab46820b524afda05122893c2fe8e-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Learning from Rational Behavior: Predicting Solutions to Unknown Linear Programs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8515", "id": "8515", "author_site": "Shahin Jabbari, Ryan Rogers, Aaron Roth, Steven Wu", "author": "Shahin Jabbari; Ryan M Rogers; Aaron Roth; Steven Z. Wu", "abstract": "We define and study the problem of predicting the solution to a linear program (LP) given only partial information about its objective and constraints. This generalizes the problem of learning to predict the purchasing behavior of a rational agent who has an unknown objective function, that has been studied under the name \u201cLearning from Revealed Preferences\". We give mistake bound learning algorithms in two settings: in the first, the objective of the LP is known to the learner but there is an arbitrary, fixed set of constraints which are unknown. Each example is defined by an additional known constraint and the goal of the learner is to predict the optimal solution of the LP given the union of the known and unknown constraints. This models the problem of predicting the behavior of a rational agent whose goals are known, but whose resources are unknown. In the second setting, the objective of the LP is unknown, and changing in a controlled way. The constraints of the LP may also change every day, but are known. An example is given by a set of constraints and partial information about the objective, and the task of the learner is again to predict the optimal solution of the partially known LP.", "bibtex": "@inproceedings{NIPS2016_ca8155f4,\n author = {Jabbari, Shahin and Rogers, Ryan M and Roth, Aaron and Wu, Steven Z.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning from Rational Behavior: Predicting Solutions to Unknown Linear Programs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ca8155f4d27f205953f9d3d7974bdd70-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ca8155f4d27f205953f9d3d7974bdd70-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ca8155f4d27f205953f9d3d7974bdd70-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ca8155f4d27f205953f9d3d7974bdd70-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ca8155f4d27f205953f9d3d7974bdd70-Reviews.html", "metareview": "", "pdf_size": 508333, "gs_citation": 15, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7637879785662422987&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "University of Pennsylvania; University of Pennsylvania; University of Pennsylvania; University of Pennsylvania", "aff_domain": "cis.upenn.edu;sas.upenn.edu;cis.upenn.edu;cis.upenn.edu", "email": "cis.upenn.edu;sas.upenn.edu;cis.upenn.edu;cis.upenn.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ca8155f4d27f205953f9d3d7974bdd70-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "7c8c9a742a", "title": "Learning from Small Sample Sets by Combining Unsupervised Meta-Training with CNNs", "site": "https://papers.nips.cc/paper_files/paper/2016/hash/140f6969d5213fd0ece03148e62e461e-Abstract.html", "author": "Yu-Xiong Wang; Martial Hebert", "abstract": "This work explores CNNs for the recognition of novel categories from few examples. Inspired by the transferability properties of CNNs, we introduce an additional unsupervised meta-training stage that exposes multiple top layer units to a large amount of unlabeled real-world images. By encouraging these units to learn diverse sets of low-density separators across the unlabeled data, we capture a more generic, richer description of the visual world, which decouples these units from ties to a specific set of categories. We propose an unsupervised margin maximization that jointly estimates compact high-density regions and infers low-density separators. The low-density separator (LDS) modules can be plugged into any or all of the top layers of a standard CNN architecture. The resulting CNNs significantly improve the performance in scene classification, fine-grained recognition, and action recognition with small training samples.", "bibtex": "@inproceedings{NIPS2016_140f6969,\n author = {Wang, Yu-Xiong and Hebert, Martial},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning from Small Sample Sets by Combining Unsupervised Meta-Training with CNNs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/140f6969d5213fd0ece03148e62e461e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/140f6969d5213fd0ece03148e62e461e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/140f6969d5213fd0ece03148e62e461e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/140f6969d5213fd0ece03148e62e461e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/140f6969d5213fd0ece03148e62e461e-Reviews.html", "metareview": "", "pdf_size": 4660173, "gs_citation": 94, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5710384619356050637&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Robotics Institute, Carnegie Mellon University; Robotics Institute, Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 2, "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Robotics Institute", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pittsburgh", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning in Games: Robustness of Fast Convergence", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7387", "id": "7387", "author_site": "Dylan Foster, zhiyuan li, Thodoris Lykouris, Karthik Sridharan, Eva Tardos", "author": "Dylan J Foster; Zhiyuan Li; Thodoris Lykouris; Karthik Sridharan; Eva Tardos", "abstract": "We show that learning algorithms satisfying a low approximate regret property experience fast convergence to approximate optimality in a large class of repeated games. Our property, which simply requires that each learner has small regret compared to a (1+eps)-multiplicative approximation to the best action in hindsight, is ubiquitous among learning algorithms; it is satisfied even by the vanilla Hedge forecaster. Our results improve upon recent work of Syrgkanis et al. in a number of ways. We require only that players observe payoffs under other players' realized actions, as opposed to expected payoffs. We further show that convergence occurs with high probability, and show convergence under bandit feedback. Finally, we improve upon the speed of convergence by a factor of n, the number of players. Both the scope of settings and the class of algorithms for which our analysis provides fast convergence are considerably broader than in previous work. Our framework applies to dynamic population games via a low approximate regret property for shifting experts. Here we strengthen the results of Lykouris et al. in two ways: We allow players to select learning algorithms from a larger class, which includes a minor variant of the basic Hedge algorithm, and we increase the maximum churn in players for which approximate optimality is achieved. In the bandit setting we present a new algorithm which provides a \"small loss\"-type bound with improved dependence on the number of actions in utility settings, and is both simple and efficient. This result may be of independent interest.", "bibtex": "@inproceedings{NIPS2016_b3f61131,\n author = {Foster, Dylan J and Li, Zhiyuan and Lykouris, Thodoris and Sridharan, Karthik and Tardos, Eva},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning in Games: Robustness of Fast Convergence},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b3f61131b6eceeb2b14835fa648a48ff-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b3f61131b6eceeb2b14835fa648a48ff-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b3f61131b6eceeb2b14835fa648a48ff-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b3f61131b6eceeb2b14835fa648a48ff-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b3f61131b6eceeb2b14835fa648a48ff-Reviews.html", "metareview": "", "pdf_size": 526385, "gs_citation": 133, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2611646240577621177&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Cornell University; Tsinghua University+Cornell University; Cornell University; Cornell University; Cornell University", "aff_domain": "cs.cornell.edu;mails.tsinghua.edu.cn;cs.cornell.edu;cs.cornell.edu;cs.cornell.edu", "email": "cs.cornell.edu;mails.tsinghua.edu.cn;cs.cornell.edu;cs.cornell.edu;cs.cornell.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b3f61131b6eceeb2b14835fa648a48ff-Abstract.html", "aff_unique_index": "0;1+0;0;0;0", "aff_unique_norm": "Cornell University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Cornell;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1+0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Learning shape correspondence with anisotropic convolutional neural networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7025", "id": "7025", "author_site": "Davide Boscaini, Jonathan Masci, Emanuele Rodol\u00e0, Michael Bronstein", "author": "Davide Boscaini; Jonathan Masci; Emanuele Rodol\u00e0; Michael Bronstein", "abstract": "Convolutional neural networks have achieved extraordinary results in many computer vision and pattern recognition applications; however, their adoption in the computer graphics and geometry processing communities is limited due to the non-Euclidean structure of their data. In this paper, we propose Anisotropic Convolutional Neural Network (ACNN), a generalization of classical CNNs to non-Euclidean domains, where classical convolutions are replaced by projections over a set of oriented anisotropic diffusion kernels. We use ACNNs to effectively learn intrinsic dense correspondences between deformable shapes, a fundamental problem in geometry processing, arising in a wide variety of applications. We tested ACNNs performance in very challenging settings, achieving state-of-the-art results on some of the most difficult recent correspondence benchmarks.", "bibtex": "@inproceedings{NIPS2016_228499b5,\n author = {Boscaini, Davide and Masci, Jonathan and Rodol\\`{a}, Emanuele and Bronstein, Michael},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning shape correspondence with anisotropic convolutional neural networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/228499b55310264a8ea0e27b6e7c6ab6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/228499b55310264a8ea0e27b6e7c6ab6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/228499b55310264a8ea0e27b6e7c6ab6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/228499b55310264a8ea0e27b6e7c6ab6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/228499b55310264a8ea0e27b6e7c6ab6-Reviews.html", "metareview": "", "pdf_size": 1699884, "gs_citation": 638, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8736265664125680677&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "USI Lugano, Switzerland; USI Lugano, Switzerland; USI Lugano, Switzerland; USI Lugano, Switzerland+Tel Aviv University, Israel+Intel, Israel", "aff_domain": "usi.ch;usi.ch;usi.ch;usi.ch", "email": "usi.ch;usi.ch;usi.ch;usi.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/228499b55310264a8ea0e27b6e7c6ab6-Abstract.html", "aff_unique_index": "0;0;0;0+1+2", "aff_unique_norm": "Universit\u00e0 della Svizzera italiana;Tel Aviv University;Intel", "aff_unique_dep": ";;Intel Corporation", "aff_unique_url": "https://www.usi.ch;https://www.tau.ac.il;https://www.intel.com", "aff_unique_abbr": "USI;TAU;Intel", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Lugano;", "aff_country_unique_index": "0;0;0;0+1+1", "aff_country_unique": "Switzerland;Israel" }, { "title": "Learning the Number of Neurons in Deep Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6970", "id": "6970", "author_site": "Jose M. Alvarez, Mathieu Salzmann", "author": "Jose M Alvarez; Mathieu Salzmann", "abstract": "Nowadays, the number of layers and of neurons in each layer of a deep network are typically set manually. While very deep and wide networks have proven effective in general, they come at a high memory and computation cost, thus making them impractical for constrained platforms. These networks, however, are known to have many redundant parameters, and could thus, in principle, be replaced by more compact architectures. In this paper, we introduce an approach to automatically determining the number of neurons in each layer of a deep network during learning. To this end, we propose to make use of a group sparsity regularizer on the parameters of the network, where each group is defined to act on a single neuron. Starting from an overcomplete network, we show that our approach can reduce the number of parameters by up to 80\\% while retaining or even improving the network accuracy.", "bibtex": "@inproceedings{NIPS2016_6e7d2da6,\n author = {Alvarez, Jose M and Salzmann, Mathieu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning the Number of Neurons in Deep Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6e7d2da6d3953058db75714ac400b584-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6e7d2da6d3953058db75714ac400b584-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6e7d2da6d3953058db75714ac400b584-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6e7d2da6d3953058db75714ac400b584-Reviews.html", "metareview": "", "pdf_size": 339978, "gs_citation": 534, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7560041967181663482&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Data61 @ CSIRO, Canberra, ACT 2601, Australia; CVLab, EPFL, CH-1015 Lausanne, Switzerland", "aff_domain": "data61.csiro.au;epfl.ch", "email": "data61.csiro.au;epfl.ch", "github": "", "project": "http://www.josemalvarez.net", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6e7d2da6d3953058db75714ac400b584-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "CSIRO;EPFL", "aff_unique_dep": "Data61;CVLab", "aff_unique_url": "https://www.csiro.au;https://www.epfl.ch", "aff_unique_abbr": "CSIRO;EPFL", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Canberra;Lausanne", "aff_country_unique_index": "0;1", "aff_country_unique": "Australia;Switzerland" }, { "title": "Learning to Communicate with Deep Multi-Agent Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6897", "id": "6897", "author_site": "Jakob Foerster, Yannis Assael, Nando de Freitas, Shimon Whiteson", "author": "Jakob Foerster; Ioannis Alexandros Assael; Nando de Freitas; Shimon Whiteson", "abstract": "We consider the problem of multiple agents sensing and acting in environments with the goal of maximising their shared utility. In these environments, agents must learn communication protocols in order to share information that is needed to solve the tasks. By embracing deep neural networks, we are able to demonstrate end-to-end learning of protocols in complex environments inspired by communication riddles and multi-agent computer vision problems with partial observability. We propose two approaches for learning in these domains: Reinforced Inter-Agent Learning (RIAL) and Differentiable Inter-Agent Learning (DIAL). The former uses deep Q-learning, while the latter exploits the fact that, during learning, agents can backpropagate error derivatives through (noisy) communication channels. Hence, this approach uses centralised learning but decentralised execution. Our experiments introduce new environments for studying the learning of communication protocols and present a set of engineering innovations that are essential for success in these domains.", "bibtex": "@inproceedings{NIPS2016_c7635bfd,\n author = {Foerster, Jakob and Assael, Ioannis Alexandros and de Freitas, Nando and Whiteson, Shimon},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Communicate with Deep Multi-Agent Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c7635bfd99248a2cdef8249ef7bfbef4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c7635bfd99248a2cdef8249ef7bfbef4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c7635bfd99248a2cdef8249ef7bfbef4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c7635bfd99248a2cdef8249ef7bfbef4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c7635bfd99248a2cdef8249ef7bfbef4-Reviews.html", "metareview": "", "pdf_size": 1077870, "gs_citation": 2331, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14427321615765348461&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "University of Oxford, United Kingdom; University of Oxford, United Kingdom; University of Oxford, United Kingdom + Canadian Institute for Advanced Research, CIFAR NCAP Program + Google DeepMind; University of Oxford, United Kingdom", "aff_domain": "cs.ox.ac.uk;cs.ox.ac.uk;google.com;cs.ox.ac.uk", "email": "cs.ox.ac.uk;cs.ox.ac.uk;google.com;cs.ox.ac.uk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c7635bfd99248a2cdef8249ef7bfbef4-Abstract.html", "aff_unique_index": "0;0;0+1+2;0", "aff_unique_norm": "University of Oxford;Canadian Institute for Advanced Research;Google", "aff_unique_dep": ";CIFAR NCAP Program;Google DeepMind", "aff_unique_url": "https://www.ox.ac.uk;https://www.cifar.ca;https://deepmind.com", "aff_unique_abbr": "Oxford;CIFAR;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0+1+0;0", "aff_country_unique": "United Kingdom;Canada" }, { "title": "Learning to Poke by Poking: Experiential Learning of Intuitive Physics", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7411", "id": "7411", "author_site": "Pulkit Agrawal, Ashvin Nair, Pieter Abbeel, Jitendra Malik, Sergey Levine", "author": "Pulkit Agrawal; Ashvin V Nair; Pieter Abbeel; Jitendra Malik; Sergey Levine", "abstract": "We investigate an experiential learning paradigm for acquiring an internal model of intuitive physics. Our model is evaluated on a real-world robotic manipulation task that requires displacing objects to target locations by poking. The robot gathered over 400 hours of experience by executing more than 50K pokes on different objects. We propose a novel approach based on deep neural networks for modeling the dynamics of robot's interactions directly from images, by jointly estimating forward and inverse models of dynamics. The inverse model objective provides supervision to construct informative visual features, which the forward model can then predict and in turn regularize the feature space for the inverse model. The interplay between these two objectives creates useful, accurate models that can then be used for multi-step decision making. This formulation has the additional benefit that it is possible to learn forward models in an abstract feature space and thus alleviate the need of predicting pixels. Our experiments show that this joint modeling approach outperforms alternative methods. We also demonstrate that active data collection using the learned model further improves performance.", "bibtex": "@inproceedings{NIPS2016_c203d8a1,\n author = {Agrawal, Pulkit and Nair, Ashvin V and Abbeel, Pieter and Malik, Jitendra and Levine, Sergey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to Poke by Poking: Experiential Learning of Intuitive Physics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c203d8a151612acf12457e4d67635a95-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c203d8a151612acf12457e4d67635a95-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c203d8a151612acf12457e4d67635a95-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c203d8a151612acf12457e4d67635a95-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c203d8a151612acf12457e4d67635a95-Reviews.html", "metareview": "", "pdf_size": 8870441, "gs_citation": 641, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4353677612815002221&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Berkeley Artificial Intelligence Research Laboratory (BAIR); Berkeley Artificial Intelligence Research Laboratory (BAIR); Berkeley Artificial Intelligence Research Laboratory (BAIR); Berkeley Artificial Intelligence Research Laboratory (BAIR); Berkeley Artificial Intelligence Research Laboratory (BAIR)", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "email": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c203d8a151612acf12457e4d67635a95-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Artificial Intelligence Research Laboratory", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Learning to learn by gradient descent by gradient descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7042", "id": "7042", "author_site": "Marcin Andrychowicz, Misha Denil, Sergio G\u00f3mez, Matthew Hoffman, David Pfau, Tom Schaul, Nando de Freitas", "author": "Marcin Andrychowicz; Misha Denil; Sergio G\u00f3mez; Matthew W Hoffman; David Pfau; Tom Schaul; Brendan Shillingford; Nando de Freitas", "abstract": "The move from hand-designed features to learned features in machine learning has been wildly successful. In spite of this, optimization algorithms are still designed by hand. In this paper we show how the design of an optimization algorithm can be cast as a learning problem, allowing the algorithm to learn to exploit structure in the problems of interest in an automatic way. Our learned algorithms, implemented by LSTMs, outperform generic, hand-designed competitors on the tasks for which they are trained, and also generalize well to new tasks with similar structure. We demonstrate this on a number of tasks, including simple convex problems, training neural networks, and styling images with neural art.", "bibtex": "@inproceedings{NIPS2016_fb875828,\n author = {Andrychowicz, Marcin and Denil, Misha and G\\'{o}mez, Sergio and Hoffman, Matthew W and Pfau, David and Schaul, Tom and Shillingford, Brendan and de Freitas, Nando},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning to learn by gradient descent by gradient descent},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fb87582825f9d28a8d42c5e5e5e8b23d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fb87582825f9d28a8d42c5e5e5e8b23d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/fb87582825f9d28a8d42c5e5e5e8b23d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fb87582825f9d28a8d42c5e5e5e8b23d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fb87582825f9d28a8d42c5e5e5e8b23d-Reviews.html", "metareview": "", "pdf_size": 1163569, "gs_citation": 2620, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17211876730630533152&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind+University of Oxford; Google DeepMind+University of Oxford+Canadian Institute for Advanced Research", "aff_domain": "gmail.com;google.com;google.com;google.com;google.com;google.com;cs.ox.ac.uk;google.com", "email": "gmail.com;google.com;google.com;google.com;google.com;google.com;cs.ox.ac.uk;google.com", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fb87582825f9d28a8d42c5e5e5e8b23d-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0+1;0+1+2", "aff_unique_norm": "Google;University of Oxford;Canadian Institute for Advanced Research", "aff_unique_dep": "Google DeepMind;;", "aff_unique_url": "https://deepmind.com;https://www.ox.ac.uk;https://www.cifar.ca", "aff_unique_abbr": "DeepMind;Oxford;CIFAR", "aff_campus_unique_index": ";", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0+0;0+0+1", "aff_country_unique": "United Kingdom;Canada" }, { "title": "Learning under uncertainty: a comparison between R-W and Bayesian approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7330", "id": "7330", "author_site": "He Huang, Martin Paulus", "author": "He Huang; Martin Paulus", "abstract": "Accurately differentiating between what are truly unpredictably random and systematic changes that occur at random can have profound effect on affect and cognition. To examine the underlying computational principles that guide different learning behavior in an uncertain environment, we compared an R-W model and a Bayesian approach in a visual search task with different volatility levels. Both R-W model and the Bayesian approach reflected an individual's estimation of the environmental volatility, and there is a strong correlation between the learning rate in R-W model and the belief of stationarity in the Bayesian approach in different volatility conditions. In a low volatility condition, R-W model indicates that learning rate positively correlates with lose-shift rate, but not choice optimality (inverted U shape). The Bayesian approach indicates that the belief of environmental stationarity positively correlates with choice optimality, but not lose-shift rate (inverted U shape). In addition, we showed that comparing to Expert learners, individuals with high lose-shift rate (sub-optimal learners) had significantly higher learning rate estimated from R-W model and lower belief of stationarity from the Bayesian model.", "bibtex": "@inproceedings{NIPS2016_14d9e800,\n author = {Huang, He and Paulus, Martin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning under uncertainty: a comparison between R-W and Bayesian approach},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/14d9e8007c9b41f57891c48e07c23f57-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/14d9e8007c9b41f57891c48e07c23f57-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/14d9e8007c9b41f57891c48e07c23f57-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/14d9e8007c9b41f57891c48e07c23f57-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/14d9e8007c9b41f57891c48e07c23f57-Reviews.html", "metareview": "", "pdf_size": 677392, "gs_citation": 2, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1385828035409957361&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Laureate Institute for Brain Research; Laureate Institute for Brain Research", "aff_domain": "gmail.com;laureateinstitute.org", "email": "gmail.com;laureateinstitute.org", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/14d9e8007c9b41f57891c48e07c23f57-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Laureate Institute for Brain Research", "aff_unique_dep": "", "aff_unique_url": "https://www.laureateinstitute.org", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Learning values across many orders of magnitude", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7311", "id": "7311", "author_site": "Hado van Hasselt, Arthur Guez, Arthur Guez, Matteo Hessel, Volodymyr Mnih, David Silver", "author": "Hado P van Hasselt; Arthur Guez; Arthur Guez; Matteo Hessel; Volodymyr Mnih; David Silver", "abstract": "Most learning algorithms are not invariant to the scale of the signal that is being approximated. We propose to adaptively normalize the targets used in the learning updates. This is important in value-based reinforcement learning, where the magnitude of appropriate value approximations can change over time when we update the policy of behavior. Our main motivation is prior work on learning to play Atari games, where the rewards were clipped to a predetermined range. This clipping facilitates learning across many different games with a single learning algorithm, but a clipped reward function can result in qualitatively different behavior. Using adaptive normalization we can remove this domain-specific heuristic without diminishing overall performance.", "bibtex": "@inproceedings{NIPS2016_5227b6aa,\n author = {van Hasselt, Hado P and Guez, Arthur and Guez, Arthur and Hessel, Matteo and Mnih, Volodymyr and Silver, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Learning values across many orders of magnitude},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5227b6aaf294f5f027273aebf16015f2-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5227b6aaf294f5f027273aebf16015f2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/5227b6aaf294f5f027273aebf16015f2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5227b6aaf294f5f027273aebf16015f2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5227b6aaf294f5f027273aebf16015f2-Reviews.html", "metareview": "", "pdf_size": 905195, "gs_citation": 218, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1890311093801365339&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5227b6aaf294f5f027273aebf16015f2-Abstract.html" }, { "title": "Leveraging Sparsity for Efficient Submodular Data Summarization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7253", "id": "7253", "author_site": "Erik Lindgren, Shanshan Wu, Alex Dimakis", "author": "Erik Lindgren; Shanshan Wu; Alexandros G Dimakis", "abstract": "The facility location problem is widely used for summarizing large datasets and has additional applications in sensor placement, image retrieval, and clustering. One difficulty of this problem is that submodular optimization algorithms require the calculation of pairwise benefits for all items in the dataset. This is infeasible for large problems, so recent work proposed to only calculate nearest neighbor benefits. One limitation is that several strong assumptions were invoked to obtain provable approximation guarantees. In this paper we establish that these extra assumptions are not necessary\u2014solving the sparsified problem will be almost optimal under the standard assumptions of the problem. We then analyze a different method of sparsification that is a better model for methods such as Locality Sensitive Hashing to accelerate the nearest neighbor computations and extend the use of the problem to a broader family of similarities. We validate our approach by demonstrating that it rapidly generates interpretable summaries.", "bibtex": "@inproceedings{NIPS2016_d43ab110,\n author = {Lindgren, Erik and Wu, Shanshan and Dimakis, Alexandros G},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Leveraging Sparsity for Efficient Submodular Data Summarization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d43ab110ab2489d6b9b2caa394bf920f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d43ab110ab2489d6b9b2caa394bf920f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d43ab110ab2489d6b9b2caa394bf920f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d43ab110ab2489d6b9b2caa394bf920f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d43ab110ab2489d6b9b2caa394bf920f-Reviews.html", "metareview": "", "pdf_size": 1427744, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8850848481702917303&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "The University of Texas at Austin, Department of Electrical and Computer Engineering; The University of Texas at Austin, Department of Electrical and Computer Engineering; The University of Texas at Austin, Department of Electrical and Computer Engineering", "aff_domain": "utexas.edu;utexas.edu;austin.utexas.edu", "email": "utexas.edu;utexas.edu;austin.utexas.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d43ab110ab2489d6b9b2caa394bf920f-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Lifelong Learning with Weighted Majority Votes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7092", "id": "7092", "author_site": "Anastasia Pentina, Ruth Urner", "author": "Anastasia Pentina; Ruth Urner", "abstract": "Better understanding of the potential benefits of information transfer and representation learning is an important step towards the goal of building intelligent systems that are able to persist in the world and learn over time. In this work, we consider a setting where the learner encounters a stream of tasks but is able to retain only limited information from each encountered task, such as a learned predictor. In contrast to most previous works analyzing this scenario, we do not make any distributional assumptions on the task generating process. Instead, we formulate a complexity measure that captures the diversity of the observed tasks. We provide a lifelong learning algorithm with error guarantees for every observed task (rather than on average). We show sample complexity reductions in comparison to solving every task in isolation in terms of our task complexity measure. Further, our algorithmic framework can naturally be viewed as learning a representation from encountered tasks with a neural network.", "bibtex": "@inproceedings{NIPS2016_f39ae9ff,\n author = {Pentina, Anastasia and Urner, Ruth},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Lifelong Learning with Weighted Majority Votes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f39ae9ff3a81f499230c4126e01f421b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f39ae9ff3a81f499230c4126e01f421b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f39ae9ff3a81f499230c4126e01f421b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f39ae9ff3a81f499230c4126e01f421b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f39ae9ff3a81f499230c4126e01f421b-Reviews.html", "metareview": "", "pdf_size": 266767, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=323299473106966553&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "IST Austria; Max Planck Institute for Intelligent Systems", "aff_domain": "ist.ac.at;tuebingen.mpg.de", "email": "ist.ac.at;tuebingen.mpg.de", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f39ae9ff3a81f499230c4126e01f421b-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Institute of Science and Technology Austria;Max Planck Institute for Intelligent Systems", "aff_unique_dep": ";Intelligent Systems", "aff_unique_url": "https://www.ist.ac.at;https://www.mpi-is.mpg.de", "aff_unique_abbr": "IST Austria;MPI-IS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Austria;Germany" }, { "title": "LightRNN: Memory and Computation-Efficient Recurrent Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7099", "id": "7099", "author_site": "Xiang Li, Tao Qin, Jian Yang, Xiaolin Hu, Tie-Yan Liu", "author": "Xiang Li; Tao Qin; Jian Yang; Tie-Yan Liu", "abstract": "Recurrent neural networks (RNNs) have achieved state-of-the-art performances in many natural language processing tasks, such as language modeling and machine translation. However, when the vocabulary is large, the RNN model will become very big (e.g., possibly beyond the memory capacity of a GPU device) and its training will become very inefficient. In this work, we propose a novel technique to tackle this challenge. The key idea is to use 2-Component (2C) shared embedding for word representations. We allocate every word in the vocabulary into a table, each row of which is associated with a vector, and each column associated with another vector. Depending on its position in the table, a word is jointly represented by two components: a row vector and a column vector. Since the words in the same row share the row vector and the words in the same column share the column vector, we only need $2 \\sqrt{|V|}$ vectors to represent a vocabulary of $|V|$ unique words, which are far less than the $|V|$ vectors required by existing approaches. Based on the 2-Component shared embedding, we design a new RNN algorithm and evaluate it using the language modeling task on several benchmark datasets. The results show that our algorithm significantly reduces the model size and speeds up the training process, without sacrifice of accuracy (it achieves similar, if not better, perplexity as compared to state-of-the-art language models). Remarkably, on the One-Billion-Word benchmark Dataset, our algorithm achieves comparable perplexity to previous language models, whilst reducing the model size by a factor of 40-100, and speeding up the training process by a factor of 2. We name our proposed algorithm \\emph{LightRNN} to reflect its very small model size and very high training speed.", "bibtex": "@inproceedings{NIPS2016_c3e4035a,\n author = {Li, Xiang and Qin, Tao and Yang, Jian and Liu, Tie-Yan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {LightRNN: Memory and Computation-Efficient Recurrent Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c3e4035af2a1cde9f21e1ae1951ac80b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c3e4035af2a1cde9f21e1ae1951ac80b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c3e4035af2a1cde9f21e1ae1951ac80b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c3e4035af2a1cde9f21e1ae1951ac80b-Reviews.html", "metareview": "", "pdf_size": 520214, "gs_citation": 73, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7191833239681372929&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Nanjing University of Science and Technology; Microsoft Research Asia; Nanjing University of Science and Technology; Microsoft Research Asia", "aff_domain": "gmail.com;microsoft.com;njust.edu.cn;microsoft.com", "email": "gmail.com;microsoft.com;njust.edu.cn;microsoft.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c3e4035af2a1cde9f21e1ae1951ac80b-Abstract.html", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Nanjing University of Science and Technology;Microsoft", "aff_unique_dep": ";Research", "aff_unique_url": "http://www.nust.edu.cn/;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "NUST;MSR Asia", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "title": "Linear Contextual Bandits with Knapsacks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7392", "id": "7392", "author_site": "Shipra Agrawal, Nikhil Devanur", "author": "Shipra Agrawal; Nikhil Devanur", "abstract": "We consider the linear contextual bandit problem with resource consumption, in addition to reward generation. In each round, the outcome of pulling an arm is a reward as well as a vector of resource consumptions. The expected values of these outcomes depend linearly on the context of that arm. The budget/capacity constraints require that the sum of these vectors doesn't exceed the budget in each dimension. The objective is once again to maximize the total reward. This problem turns out to be a common generalization of classic linear contextual bandits (linContextual), bandits with knapsacks (BwK), and the online stochastic packing problem (OSPP). We present algorithms with near-optimal regret bounds for this problem. Our bounds compare favorably to results on the unstructured version of the problem, where the relation between the contexts and the outcomes could be arbitrary, but the algorithm only competes against a fixed set of policies accessible through an optimization oracle. We combine techniques from the work on linContextual, BwK and OSPP in a nontrivial manner while also tackling new difficulties that are not present in any of these special cases.", "bibtex": "@inproceedings{NIPS2016_f3144cef,\n author = {Agrawal, Shipra and Devanur, Nikhil},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Linear Contextual Bandits with Knapsacks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f3144cefe89a60d6a1afaf7859c5076b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f3144cefe89a60d6a1afaf7859c5076b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f3144cefe89a60d6a1afaf7859c5076b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f3144cefe89a60d6a1afaf7859c5076b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f3144cefe89a60d6a1afaf7859c5076b-Reviews.html", "metareview": "", "pdf_size": 147043, "gs_citation": 196, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12019787774852043810&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Columbia University; Microsoft Research", "aff_domain": "columbia.edu;microsoft.com", "email": "columbia.edu;microsoft.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f3144cefe89a60d6a1afaf7859c5076b-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Columbia University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.columbia.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Columbia;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Linear Feature Encoding for Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7132", "id": "7132", "author_site": "Zhao Song, Ronald Parr, Xuejun Liao, Lawrence Carin", "author": "Zhao Song; Ronald E Parr; Xuejun Liao; Lawrence Carin", "abstract": "Feature construction is of vital importance in reinforcement learning, as the quality of a value function or policy is largely determined by the corresponding features. The recent successes of deep reinforcement learning (RL) only increase the importance of understanding feature construction. Typical deep RL approaches use a linear output layer, which means that deep RL can be interpreted as a feature construction/encoding network followed by linear value function approximation. This paper develops and evaluates a theory of linear feature encoding. We extend theoretical results on feature quality for linear value function approximation from the uncontrolled case to the controlled case. We then develop a supervised linear feature encoding method that is motivated by insights from linear value function approximation theory, as well as empirical successes from deep RL. The resulting encoder is a surprisingly effective method for linear value function approximation using raw images as inputs.", "bibtex": "@inproceedings{NIPS2016_8232e119,\n author = {Song, Zhao and Parr, Ronald E and Liao, Xuejun and Carin, Lawrence},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Linear Feature Encoding for Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8232e119d8f59aa83050a741631803a6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8232e119d8f59aa83050a741631803a6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8232e119d8f59aa83050a741631803a6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8232e119d8f59aa83050a741631803a6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8232e119d8f59aa83050a741631803a6-Reviews.html", "metareview": "", "pdf_size": 296962, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18351227936220963481&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8232e119d8f59aa83050a741631803a6-Abstract.html" }, { "title": "Linear Relaxations for Finding Diverse Elements in Metric Spaces", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8508", "id": "8508", "author_site": "Aditya Bhaskara, Mehrdad Ghadiri, Vahab Mirrokni, Ola Svensson", "author": "Aditya Bhaskara; Mehrdad Ghadiri; Vahab Mirrokni; Ola Svensson", "abstract": "Choosing a diverse subset of a large collection of points in a metric space is a fundamental problem, with applications in feature selection, recommender systems, web search, data summarization, etc. Various notions of diversity have been proposed, tailored to different applications. The general algorithmic goal is to find a subset of points that maximize diversity, while obeying a cardinality (or more generally, matroid) constraint. The goal of this paper is to develop a novel linear programming (LP) framework that allows us to design approximation algorithms for such problems. We study an objective known as {\\em sum-min} diversity, which is known to be effective in many applications, and give the first constant factor approximation algorithm. Our LP framework allows us to easily incorporate additional constraints, as well as secondary objectives. We also prove a hardness result for two natural diversity objectives, under the so-called {\\em planted clique} assumption. Finally, we study the empirical performance of our algorithm on several standard datasets. We first study the approximation quality of the algorithm by comparing with the LP objective. Then, we compare the quality of the solutions produced by our method with other popular diversity maximization algorithms.", "bibtex": "@inproceedings{NIPS2016_d79c6256,\n author = {Bhaskara, Aditya and Ghadiri, Mehrdad and Mirrokni, Vahab and Svensson, Ola},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Linear Relaxations for Finding Diverse Elements in Metric Spaces},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d79c6256b9bdac53a55801a066b70da3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d79c6256b9bdac53a55801a066b70da3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d79c6256b9bdac53a55801a066b70da3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d79c6256b9bdac53a55801a066b70da3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d79c6256b9bdac53a55801a066b70da3-Reviews.html", "metareview": "", "pdf_size": 4602046, "gs_citation": 27, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10184871639293877621&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "University of Utah; Sharif University of Technology; Google Research; EPFL", "aff_domain": "cs.utah.edu;ce.sharif.edu;google.com;epfl.ch", "email": "cs.utah.edu;ce.sharif.edu;google.com;epfl.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d79c6256b9bdac53a55801a066b70da3-Abstract.html", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Utah;Sharif University of Technology;Google;EPFL", "aff_unique_dep": ";;Google Research;", "aff_unique_url": "https://www.utah.edu;https://www.sharif.edu;https://research.google;https://www.epfl.ch", "aff_unique_abbr": "Utah;SUT;Google Research;EPFL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "United States;Iran;Switzerland" }, { "title": "Linear dynamical neural population models through nonlinear embeddings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7337", "id": "7337", "author_site": "Yuanjun Gao, Evan Archer, Liam Paninski, John Cunningham", "author": "Yuanjun Gao; Evan W Archer; Liam Paninski; John P. Cunningham", "abstract": "A body of recent work in modeling neural activity focuses on recovering low- dimensional latent features that capture the statistical structure of large-scale neural populations. Most such approaches have focused on linear generative models, where inference is computationally tractable. Here, we propose fLDS, a general class of nonlinear generative models that permits the firing rate of each neuron to vary as an arbitrary smooth function of a latent, linear dynamical state. This extra flexibility allows the model to capture a richer set of neural variability than a purely linear model, but retains an easily visualizable low-dimensional latent space. To fit this class of non-conjugate models we propose a variational inference scheme, along with a novel approximate posterior capable of capturing rich temporal correlations across time. We show that our techniques permit inference in a wide class of generative models.We also show in application to two neural datasets that, compared to state-of-the-art neural population models, fLDS captures a much larger proportion of neural variability with a small number of latent dimensions, providing superior predictive performance and interpretability.", "bibtex": "@inproceedings{NIPS2016_76dc611d,\n author = {Gao, Yuanjun and Archer, Evan W and Paninski, Liam and Cunningham, John P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Linear dynamical neural population models through nonlinear embeddings},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/76dc611d6ebaafc66cc0879c71b5db5c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/76dc611d6ebaafc66cc0879c71b5db5c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/76dc611d6ebaafc66cc0879c71b5db5c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/76dc611d6ebaafc66cc0879c71b5db5c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/76dc611d6ebaafc66cc0879c71b5db5c-Reviews.html", "metareview": "", "pdf_size": 1842362, "gs_citation": 205, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9932147788487137081&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Statistics1+Grossman Center2; Department of Statistics1+Grossman Center2; Department of Statistics1+Grossman Center2; Department of Statistics1+Grossman Center2", "aff_domain": "columbia.edu;stat.columbia.edu;stat.columbia.edu;columbia.edu", "email": "columbia.edu;stat.columbia.edu;stat.columbia.edu;columbia.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/76dc611d6ebaafc66cc0879c71b5db5c-Abstract.html", "aff_unique_index": "0+1;0+1;0+1;0+1", "aff_unique_norm": "University Affiliation Not Specified;Grossman Center", "aff_unique_dep": "Department of Statistics;", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": ";;;", "aff_campus_unique": "", "aff_country_unique_index": ";;;", "aff_country_unique": "" }, { "title": "Linear-Memory and Decomposition-Invariant Linearly Convergent Conditional Gradient Algorithm for Structured Polytopes", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7413", "id": "7413", "author_site": "Dan Garber, Dan Garber, Ofer Meshi", "author": "Dan Garber; Dan Garber; Ofer Meshi", "abstract": "Recently, several works have shown that natural modifications of the classical conditional gradient method (aka Frank-Wolfe algorithm) for constrained convex optimization, provably converge with a linear rate when the feasible set is a polytope, and the objective is smooth and strongly-convex. However, all of these results suffer from two significant shortcomings: i) large memory requirement due to the need to store an explicit convex decomposition of the current iterate, and as a consequence, large running-time overhead per iteration ii) the worst case convergence rate depends unfavorably on the dimension In this work we present a new conditional gradient variant and a corresponding analysis that improves on both of the above shortcomings. In particular, both memory and computation overheads are only linear in the dimension, and in addition, in case the optimal solution is sparse, the new convergence rate replaces a factor which is at least linear in the dimension in previous works, with a linear dependence on the number of non-zeros in the optimal solution At the heart of our method, and corresponding analysis, is a novel way to compute decomposition-invariant away-steps. While our theoretical guarantees do not apply to any polytope, they apply to several important structured polytopes that capture central concepts such as paths in graphs, perfect matchings in bipartite graphs, marginal distributions that arise in structured prediction tasks, and more. Our theoretical findings are complemented by empirical evidence that shows that our method delivers state-of-the-art performance.", "bibtex": "@inproceedings{NIPS2016_daca4121,\n author = {Garber, Dan and Garber, Dan and Meshi, Ofer},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Linear-Memory and Decomposition-Invariant Linearly Convergent Conditional Gradient Algorithm for Structured Polytopes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/daca41214b39c5dc66674d09081940f0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/daca41214b39c5dc66674d09081940f0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/daca41214b39c5dc66674d09081940f0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/daca41214b39c5dc66674d09081940f0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/daca41214b39c5dc66674d09081940f0-Reviews.html", "metareview": "", "pdf_size": 842952, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17311569198709302334&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/daca41214b39c5dc66674d09081940f0-Abstract.html" }, { "title": "Local Maxima in the Likelihood of Gaussian Mixture Models: Structural Results and Algorithmic Consequences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7138", "id": "7138", "author_site": "Chi Jin, Yuchen Zhang, Sivaraman Balakrishnan, Martin J Wainwright, Michael Jordan", "author": "Chi Jin; Yuchen Zhang; Sivaraman Balakrishnan; Martin J. Wainwright; Michael I Jordan", "abstract": "We provide two fundamental results on the population (infinite-sample) likelihood function of Gaussian mixture models with $M \\geq 3$ components. Our first main result shows that the population likelihood function has bad local maxima even in the special case of equally-weighted mixtures of well-separated and spherical Gaussians. We prove that the log-likelihood value of these bad local maxima can be arbitrarily worse than that of any global optimum, thereby resolving an open question of Srebro (2007). Our second main result shows that the EM algorithm (or a first-order variant of it) with random initialization will converge to bad critical points with probability at least $1-e^{-\\Omega(M)}$. We further establish that a first-order variant of EM will not converge to strict saddle points almost surely, indicating that the poor performance of the first-order method can be attributed to the existence of bad local maxima rather than bad saddle points. Overall, our results highlight the necessity of careful initialization when using the EM algorithm in practice, even when applied in highly favorable settings.", "bibtex": "@inproceedings{NIPS2016_3875115b,\n author = {Jin, Chi and Zhang, Yuchen and Balakrishnan, Sivaraman and Wainwright, Martin J and Jordan, Michael I},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Local Maxima in the Likelihood of Gaussian Mixture Models: Structural Results and Algorithmic Consequences},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3875115bacc48cca24ac51ee4b0e7975-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3875115bacc48cca24ac51ee4b0e7975-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3875115bacc48cca24ac51ee4b0e7975-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3875115bacc48cca24ac51ee4b0e7975-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3875115bacc48cca24ac51ee4b0e7975-Reviews.html", "metareview": "", "pdf_size": 398969, "gs_citation": 198, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13243100138993478801&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "UC Berkeley; UC Berkeley; Carnegie Mellon University; UC Berkeley; UC Berkeley", "aff_domain": "cs.berkeley.edu;berkeley.edu;stat.cmu.edu;berkeley.edu;cs.berkeley.edu", "email": "cs.berkeley.edu;berkeley.edu;stat.cmu.edu;berkeley.edu;cs.berkeley.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3875115bacc48cca24ac51ee4b0e7975-Abstract.html", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of California, Berkeley;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.cmu.edu", "aff_unique_abbr": "UC Berkeley;CMU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Local Minimax Complexity of Stochastic Convex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7249", "id": "7249", "author_site": "sabyasachi chatterjee, John Duchi, John Lafferty, Yuancheng Zhu", "author": "sabyasachi chatterjee; John C. Duchi; John Lafferty; Yuancheng Zhu", "abstract": "We extend the traditional worst-case, minimax analysis of stochastic convex optimization by introducing a localized form of minimax complexity for individual functions. Our main result gives function-specific lower and upper bounds on the number of stochastic subgradient evaluations needed to optimize either the function or its ``hardest local alternative'' to a given numerical precision. The bounds are expressed in terms of a localized and computational analogue of the modulus of continuity that is central to statistical minimax analysis. We show how the computational modulus of continuity can be explicitly calculated in concrete cases, and relates to the curvature of the function at the optimum. We also prove a superefficiency result that demonstrates it is a meaningful benchmark, acting as a computational analogue of the Fisher information in statistical estimation. The nature and practical implications of the results are demonstrated in simulations.", "bibtex": "@inproceedings{NIPS2016_b9f94c77,\n author = {chatterjee, sabyasachi and Duchi, John C and Lafferty, John and Zhu, Yuancheng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Local Minimax Complexity of Stochastic Convex Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b9f94c77652c9a76fc8a442748cd54bd-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b9f94c77652c9a76fc8a442748cd54bd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b9f94c77652c9a76fc8a442748cd54bd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b9f94c77652c9a76fc8a442748cd54bd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b9f94c77652c9a76fc8a442748cd54bd-Reviews.html", "metareview": "", "pdf_size": 359324, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8816080847500939392&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b9f94c77652c9a76fc8a442748cd54bd-Abstract.html" }, { "title": "Local Similarity-Aware Deep Feature Embedding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7399", "id": "7399", "author_site": "Chen Huang, Chen Change Loy, Xiaoou Tang", "author": "Chen Huang; Chen Change Loy; Xiaoou Tang", "abstract": "Existing deep embedding methods in vision tasks are capable of learning a compact Euclidean space from images, where Euclidean distances correspond to a similarity metric. To make learning more effective and efficient, hard sample mining is usually employed, with samples identified through computing the Euclidean feature distance. However, the global Euclidean distance cannot faithfully characterize the true feature similarity in a complex visual feature space, where the intraclass distance in a high-density region may be larger than the interclass distance in low-density regions. In this paper, we introduce a Position-Dependent Deep Metric (PDDM) unit, which is capable of learning a similarity metric adaptive to local feature structure. The metric can be used to select genuinely hard samples in a local neighborhood to guide the deep embedding learning in an online and robust manner. The new layer is appealing in that it is pluggable to any convolutional networks and is trained end-to-end. Our local similarity-aware feature embedding not only demonstrates faster convergence and boosted performance on two complex image retrieval datasets, its large margin nature also leads to superior generalization results under the large and open set scenarios of transfer learning and zero-shot learning on ImageNet 2010 and ImageNet-10K datasets.", "bibtex": "@inproceedings{NIPS2016_556f3919,\n author = {Huang, Chen and Loy, Chen Change and Tang, Xiaoou},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Local Similarity-Aware Deep Feature Embedding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/556f391937dfd4398cbac35e050a2177-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/556f391937dfd4398cbac35e050a2177-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/556f391937dfd4398cbac35e050a2177-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/556f391937dfd4398cbac35e050a2177-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/556f391937dfd4398cbac35e050a2177-Reviews.html", "metareview": "", "pdf_size": 841698, "gs_citation": 206, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12742525123571585658&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Information Engineering, The Chinese University of Hong Kong; Department of Information Engineering, The Chinese University of Hong Kong; Department of Information Engineering, The Chinese University of Hong Kong", "aff_domain": "ie.cuhk.edu.hk;ie.cuhk.edu.hk;ie.cuhk.edu.hk", "email": "ie.cuhk.edu.hk;ie.cuhk.edu.hk;ie.cuhk.edu.hk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/556f391937dfd4398cbac35e050a2177-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "Department of Information Engineering", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Long-term Causal Effects via Behavioral Game Theory", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7269", "id": "7269", "author_site": "Panagiotis Toulis, David Parkes", "author": "Panagiotis Toulis; David C. Parkes", "abstract": "Planned experiments are the gold standard in reliably comparing the causal effect of switching from a baseline policy to a new policy. % One critical shortcoming of classical experimental methods, however, is that they typically do not take into account the dynamic nature of response to policy changes. For instance, in an experiment where we seek to understand the effects of a new ad pricing policy on auction revenue, agents may adapt their bidding in response to the experimental pricing changes. Thus, causal effects of the new pricing policy after such adaptation period, the {\\em long-term causal effects}, are not captured by the classical methodology even though they clearly are more indicative of the value of the new policy. % Here, we formalize a framework to define and estimate long-term causal effects of policy changes in multiagent economies. Central to our approach is behavioral game theory, which we leverage to formulate the ignorability assumptions that are necessary for causal inference. Under such assumptions we estimate long-term causal effects through a latent space approach, where a behavioral model of how agents act conditional on their latent behaviors is combined with a temporal model of how behaviors evolve over time.", "bibtex": "@inproceedings{NIPS2016_af473271,\n author = {Toulis, Panagiotis and Parkes, David C},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Long-term Causal Effects via Behavioral Game Theory},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/af4732711661056eadbf798ba191272a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/af4732711661056eadbf798ba191272a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/af4732711661056eadbf798ba191272a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/af4732711661056eadbf798ba191272a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/af4732711661056eadbf798ba191272a-Reviews.html", "metareview": "", "pdf_size": 379651, "gs_citation": 14, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9182836422926169369&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Econometrics & Statistics, Booth School, University of Chicago; Department of Computer Science, Harvard University", "aff_domain": "chicagobooth.edu;eecs.harvard.edu", "email": "chicagobooth.edu;eecs.harvard.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/af4732711661056eadbf798ba191272a-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University of Chicago;Harvard University", "aff_unique_dep": "Booth School of Business;Department of Computer Science", "aff_unique_url": "https://www.chicagobooth.edu;https://www.harvard.edu", "aff_unique_abbr": "Chicago Booth;Harvard", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Chicago;Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Low-Rank Regression with Tensor Responses", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7335", "id": "7335", "author_site": "Guillaume Rabusseau, Hachem Kadri", "author": "Guillaume Rabusseau; Hachem Kadri", "abstract": "This paper proposes an efficient algorithm (HOLRR) to handle regression tasks where the outputs have a tensor structure. We formulate the regression problem as the minimization of a least square criterion under a multilinear rank constraint, a difficult non convex problem. HOLRR computes efficiently an approximate solution of this problem, with solid theoretical guarantees. A kernel extension is also presented. Experiments on synthetic and real data show that HOLRR computes accurate solutions while being computationally very competitive.", "bibtex": "@inproceedings{NIPS2016_3806734b,\n author = {Rabusseau, Guillaume and Kadri, Hachem},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Low-Rank Regression with Tensor Responses},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3806734b256c27e41ec2c6bffa26d9e7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3806734b256c27e41ec2c6bffa26d9e7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3806734b256c27e41ec2c6bffa26d9e7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3806734b256c27e41ec2c6bffa26d9e7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3806734b256c27e41ec2c6bffa26d9e7-Reviews.html", "metareview": "", "pdf_size": 1664776, "gs_citation": 94, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9079822056797495809&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Aix Marseille Univ, CNRS, LIF, Marseille, France; Aix Marseille Univ, CNRS, LIF, Marseille, France", "aff_domain": "lif.univ-mrs.fr;lif.univ-mrs.fr", "email": "lif.univ-mrs.fr;lif.univ-mrs.fr", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3806734b256c27e41ec2c6bffa26d9e7-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Aix Marseille University", "aff_unique_dep": "CNRS, LIF", "aff_unique_url": "https://www.univ-amu.fr", "aff_unique_abbr": "AMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Marseille", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7107", "id": "7107", "author_site": "Tolga Bolukbasi, Kai-Wei Chang, James Y Zou, Venkatesh Saligrama, Adam T Kalai", "author": "Tolga Bolukbasi; Kai-Wei Chang; James Y Zou; Venkatesh Saligrama; Adam T Kalai", "abstract": "The blind application of machine learning runs the risk of amplifying biases present in data. Such a danger is facing us with word embedding, a popular framework to represent text data as vectors which has been used in many machine learning and natural language processing tasks. We show that even word embeddings trained on Google News articles exhibit female/male gender stereotypes to a disturbing extent. This raises concerns because their widespread use, as we describe, often tends to amplify these biases. Geometrically, gender bias is first shown to be captured by a direction in the word embedding. Second, gender neutral words are shown to be linearly separable from gender definition words in the word embedding. Using these properties, we provide a methodology for modifying an embedding to remove gender stereotypes, such as the association between the words receptionist and female, while maintaining desired associations such as between the words queen and female. Using crowd-worker evaluation as well as standard benchmarks, we empirically demonstrate that our algorithms significantly reduce gender bias in embeddings while preserving the its useful properties such as the ability to cluster related concepts and to solve analogy tasks. The resulting embeddings can be used in applications without amplifying gender bias.", "bibtex": "@inproceedings{NIPS2016_a486cd07,\n author = {Bolukbasi, Tolga and Chang, Kai-Wei and Zou, James Y and Saligrama, Venkatesh and Kalai, Adam T},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a486cd07e4ac3d270571622f4f316ec5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a486cd07e4ac3d270571622f4f316ec5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a486cd07e4ac3d270571622f4f316ec5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a486cd07e4ac3d270571622f4f316ec5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a486cd07e4ac3d270571622f4f316ec5-Reviews.html", "metareview": "", "pdf_size": 614644, "gs_citation": 4470, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1143892262062010100&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 18, "aff": "Boston University; Microsoft Research New England; Microsoft Research New England; Boston University + Microsoft Research New England; Microsoft Research New England", "aff_domain": "bu.edu;kwchang.net;gmail.com;bu.edu;microsoft.com", "email": "bu.edu;kwchang.net;gmail.com;bu.edu;microsoft.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a486cd07e4ac3d270571622f4f316ec5-Abstract.html", "aff_unique_index": "0;1;1;0+1;1", "aff_unique_norm": "Boston University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.bu.edu;https://www.microsoft.com/en-us/research/group/microsoft-research-new-england", "aff_unique_abbr": "BU;MSR NE", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";New England", "aff_country_unique_index": "0;0;0;0+0;0", "aff_country_unique": "United States" }, { "title": "Mapping Estimation for Discrete Optimal Transport", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7170", "id": "7170", "author_site": "Micha\u00ebl Perrot, Nicolas Courty, R\u00e9mi Flamary, Amaury Habrard", "author": "Micha\u00ebl Perrot; Nicolas Courty; R\u00e9mi Flamary; Amaury Habrard", "abstract": "We are interested in the computation of the transport map of an Optimal Transport problem. Most of the computational approaches of Optimal Transport use the Kantorovich relaxation of the problem to learn a probabilistic coupling $\\mgamma$ but do not address the problem of learning the underlying transport map $\\funcT$ linked to the original Monge problem. Consequently, it lowers the potential usage of such methods in contexts where out-of-samples computations are mandatory. In this paper we propose a new way to jointly learn the coupling and an approximation of the transport map. We use a jointly convex formulation which can be efficiently optimized. Additionally, jointly learning the coupling and the transport map allows to smooth the result of the Optimal Transport and generalize it to out-of-samples examples. Empirically, we show the interest and the relevance of our method in two tasks: domain adaptation and image editing.", "bibtex": "@inproceedings{NIPS2016_26f5bd4a,\n author = {Perrot, Micha\\\"{e}l and Courty, Nicolas and Flamary, R\\'{e}mi and Habrard, Amaury},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mapping Estimation for Discrete Optimal Transport},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/26f5bd4aa64fdadf96152ca6e6408068-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/26f5bd4aa64fdadf96152ca6e6408068-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/26f5bd4aa64fdadf96152ca6e6408068-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/26f5bd4aa64fdadf96152ca6e6408068-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/26f5bd4aa64fdadf96152ca6e6408068-Reviews.html", "metareview": "", "pdf_size": 6569925, "gs_citation": 150, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9141487419325959381&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Univ Lyon, UJM-Saint-Etienne, CNRS, Lab. Hubert Curien UMR 5516, F-42023; Universit\u00e9 de Bretagne Sud, IRISA, UMR 6074, CNRS; Universit\u00e9 C\u00f4te d\u2019Azur, Lagrange, UMR 7293, CNRS, OCA; Univ Lyon, UJM-Saint-Etienne, CNRS, Lab. Hubert Curien UMR 5516, F-42023", "aff_domain": "univ-st-etienne.fr;univ-ubs.fr;unice.fr;univ-st-etienne.fr", "email": "univ-st-etienne.fr;univ-ubs.fr;unice.fr;univ-st-etienne.fr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/26f5bd4aa64fdadf96152ca6e6408068-Abstract.html", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Universit\u00e9 Lyon;Universit\u00e9 de Bretagne Sud;Universit\u00e9 C\u00f4te d\u2019Azur", "aff_unique_dep": "Lab. Hubert Curien UMR 5516;IRISA, UMR 6074, CNRS;Lagrange, UMR 7293", "aff_unique_url": "https://www.universite-lyon.fr;https://www.univ-ubs.fr;https://www.univ-cotedazur.fr", "aff_unique_abbr": "Univ Lyon;UBS;UCA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Matching Networks for One Shot Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6999", "id": "6999", "author_site": "Oriol Vinyals, Charles Blundell, Timothy Lillicrap, koray kavukcuoglu, Daan Wierstra", "author": "Oriol Vinyals; Charles Blundell; Timothy Lillicrap; koray kavukcuoglu; Daan Wierstra", "abstract": "Learning from a few examples remains a key challenge in machine learning. Despite recent advances in important domains such as vision and language, the standard supervised deep learning paradigm does not offer a satisfactory solution for learning new concepts rapidly from little data. In this work, we employ ideas from metric learning based on deep neural features and from recent advances that augment neural networks with external memories. Our framework learns a network that maps a small labelled support set and an unlabelled example to its label, obviating the need for fine-tuning to adapt to new class types. We then define one-shot learning problems on vision (using Omniglot, ImageNet) and language tasks. Our algorithm improves one-shot accuracy on ImageNet from 82.2% to 87.8% and from 88% accuracy to 95% accuracy on Omniglot compared to competing approaches. We also demonstrate the usefulness of the same model on language modeling by introducing a one-shot task on the Penn Treebank.", "bibtex": "@inproceedings{NIPS2016_90e13578,\n author = {Vinyals, Oriol and Blundell, Charles and Lillicrap, Timothy and kavukcuoglu, koray and Wierstra, Daan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Matching Networks for One Shot Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/90e1357833654983612fb05e3ec9148c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/90e1357833654983612fb05e3ec9148c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/90e1357833654983612fb05e3ec9148c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/90e1357833654983612fb05e3ec9148c-Reviews.html", "metareview": "", "pdf_size": 2890623, "gs_citation": 9403, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16507194575687684095&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/90e1357833654983612fb05e3ec9148c-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Matrix Completion has No Spurious Local Minimum", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7218", "id": "7218", "author_site": "Rong Ge, Jason Lee, Tengyu Ma", "author": "Rong Ge; Jason Lee; Tengyu Ma", "abstract": "Matrix completion is a basic machine learning problem that has wide applications, especially in collaborative filtering and recommender systems. Simple non-convex optimization algorithms are popular and effective in practice. Despite recent progress in proving various non-convex algorithms converge from a good initial point, it remains unclear why random or arbitrary initialization suffices in practice. We prove that the commonly used non-convex objective function for matrix completion has no spurious local minima --- all local minima must also be global. Therefore, many popular optimization algorithms such as (stochastic) gradient descent can provably solve matrix completion with \\textit{arbitrary} initialization in polynomial time.", "bibtex": "@inproceedings{NIPS2016_7fb8ceb3,\n author = {Ge, Rong and Lee, Jason D and Ma, Tengyu},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Matrix Completion has No Spurious Local Minimum},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7fb8ceb3bd59c7956b1df66729296a4c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7fb8ceb3bd59c7956b1df66729296a4c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7fb8ceb3bd59c7956b1df66729296a4c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7fb8ceb3bd59c7956b1df66729296a4c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7fb8ceb3bd59c7956b1df66729296a4c-Reviews.html", "metareview": "", "pdf_size": 460525, "gs_citation": 767, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14724299708943701856&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Duke University; University of Southern California; Princeton University", "aff_domain": "cs.duke.edu;marshall.usc.edu;cs.princeton.edu", "email": "cs.duke.edu;marshall.usc.edu;cs.princeton.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7fb8ceb3bd59c7956b1df66729296a4c-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Duke University;University of Southern California;Princeton University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.duke.edu;https://www.usc.edu;https://www.princeton.edu", "aff_unique_abbr": "Duke;USC;Princeton", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Maximal Sparsity with Deep Networks?", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6986", "id": "6986", "author_site": "Bo Xin, Yizhou Wang, Wen Gao, David Wipf, Baoyuan Wang", "author": "Bo Xin; Yizhou Wang; Wen Gao; David Wipf; Baoyuan Wang", "abstract": "The iterations of many sparse estimation algorithms are comprised of a fixed linear filter cascaded with a thresholding nonlinearity, which collectively resemble a typical neural network layer. Consequently, a lengthy sequence of algorithm iterations can be viewed as a deep network with shared, hand-crafted layer weights. It is therefore quite natural to examine the degree to which a learned network model might act as a viable surrogate for traditional sparse estimation in domains where ample training data is available. While the possibility of a reduced computational budget is readily apparent when a ceiling is imposed on the number of layers, our work primarily focuses on estimation accuracy. In particular, it is well-known that when a signal dictionary has coherent columns, as quantified by a large RIP constant, then most tractable iterative algorithms are unable to find maximally sparse representations. In contrast, we demonstrate both theoretically and empirically the potential for a trained deep network to recover minimal $\\ell_0$-norm representations in regimes where existing methods fail. The resulting system, which can effectively learn novel iterative sparse estimation algorithms, is deployed on a practical photometric stereo estimation problem, where the goal is to remove sparse outliers that can disrupt the estimation of surface normals from a 3D scene.", "bibtex": "@inproceedings{NIPS2016_0d73a250,\n author = {Xin, Bo and Wang, Yizhou and Gao, Wen and Wipf, David and Wang, Baoyuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Maximal Sparsity with Deep Networks?},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0d73a25092e5c1c9769a9f3255caa65a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0d73a25092e5c1c9769a9f3255caa65a-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0d73a25092e5c1c9769a9f3255caa65a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0d73a25092e5c1c9769a9f3255caa65a-Reviews.html", "metareview": "", "pdf_size": 339248, "gs_citation": 196, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12470924270851873109&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Peking University + Microsoft Research, Beijing; Peking University; Peking University; Microsoft Research, Redmond; Microsoft Research, Beijing", "aff_domain": "microsoft.com;pku.edu.cn;pku.edu.cn;microsoft.com;microsoft.com", "email": "microsoft.com;pku.edu.cn;pku.edu.cn;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0d73a25092e5c1c9769a9f3255caa65a-Abstract.html", "aff_unique_index": "0+1;0;0;1;1", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com/en-us/research/group.aspx?group=beijing", "aff_unique_abbr": "Peking U;MSR", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Beijing;Redmond", "aff_country_unique_index": "0+0;0;0;1;0", "aff_country_unique": "China;United States" }, { "title": "Maximization of Approximately Submodular Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7367", "id": "7367", "author_site": "Thibaut Horel, Yaron Singer", "author": "Thibaut Horel; Yaron Singer", "abstract": "We study the problem of maximizing a function that is approximately submodular under a cardinality constraint. Approximate submodularity implicitly appears in a wide range of applications as in many cases errors in evaluation of a submodular function break submodularity. Say that $F$ is $\\eps$-approximately submodular if there exists a submodular function $f$ such that $(1-\\eps)f(S) \\leq F(S)\\leq (1+\\eps)f(S)$ for all subsets $S$. We are interested in characterizing the query-complexity of maximizing $F$ subject to a cardinality constraint $k$ as a function of the error level $\\eps > 0$. We provide both lower and upper bounds: for $\\eps > n^{-1/2}$ we show an exponential query-complexity lower bound. In contrast, when $\\eps < {1}/{k}$ or under a stronger bounded curvature assumption, we give constant approximation algorithms.", "bibtex": "@inproceedings{NIPS2016_81c8727c,\n author = {Horel, Thibaut and Singer, Yaron},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Maximization of Approximately Submodular Functions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/81c8727c62e800be708dbf37c4695dff-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/81c8727c62e800be708dbf37c4695dff-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/81c8727c62e800be708dbf37c4695dff-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/81c8727c62e800be708dbf37c4695dff-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/81c8727c62e800be708dbf37c4695dff-Reviews.html", "metareview": "", "pdf_size": 286975, "gs_citation": 138, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11565414318749968491&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Harvard University; Harvard University", "aff_domain": "seas.harvard.edu;seas.harvard.edu", "email": "seas.harvard.edu;seas.harvard.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/81c8727c62e800be708dbf37c4695dff-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Maximizing Influence in an Ising Network: A Mean-Field Optimal Solution", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8514", "id": "8514", "author_site": "Christopher W Lynn, Daniel Lee", "author": "Christopher Lynn; Daniel D Lee", "abstract": "Influence maximization in social networks has typically been studied in the context of contagion models and irreversible processes. In this paper, we consider an alternate model that treats individual opinions as spins in an Ising system at dynamic equilibrium. We formalize the \\textit{Ising influence maximization} problem, which has a natural physical interpretation as maximizing the magnetization given a budget of external magnetic field. Under the mean-field (MF) approximation, we present a gradient ascent algorithm that uses the susceptibility to efficiently calculate local maxima of the magnetization, and we develop a number of sufficient conditions for when the MF magnetization is concave and our algorithm converges to a global optimum. We apply our algorithm on random and real-world networks, demonstrating, remarkably, that the MF optimal external fields (i.e., the external fields which maximize the MF magnetization) exhibit a phase transition from focusing on high-degree individuals at high temperatures to focusing on low-degree individuals at low temperatures. We also establish a number of novel results about the structure of steady-states in the ferromagnetic MF Ising model on general graphs, which are of independent interest.", "bibtex": "@inproceedings{NIPS2016_2df45244,\n author = {Lynn, Christopher and Lee, Daniel D},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Maximizing Influence in an Ising Network: A Mean-Field Optimal Solution},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2df45244f09369e16ea3f9117ca45157-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2df45244f09369e16ea3f9117ca45157-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2df45244f09369e16ea3f9117ca45157-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2df45244f09369e16ea3f9117ca45157-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2df45244f09369e16ea3f9117ca45157-Reviews.html", "metareview": "", "pdf_size": 953993, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2163694463647610176&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Physics and Astronomy, University of Pennsylvania; Department of Electrical and Systems Engineering, University of Pennsylvania", "aff_domain": "sas.upenn.edu;seas.upenn.edu", "email": "sas.upenn.edu;seas.upenn.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2df45244f09369e16ea3f9117ca45157-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "Department of Physics and Astronomy", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Measuring Neural Net Robustness with Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6893", "id": "6893", "author_site": "Osbert Bastani, Yani Ioannou, Leonidas Lampropoulos, Dimitrios Vytiniotis, Aditya Nori, Antonio Criminisi", "author": "Osbert Bastani; Yani Ioannou; Leonidas Lampropoulos; Dimitrios Vytiniotis; Aditya Nori; Antonio Criminisi", "abstract": "Despite having high accuracy, neural nets have been shown to be susceptible to adversarial examples, where a small perturbation to an input can cause it to become mislabeled. We propose metrics for measuring the robustness of a neural net and devise a novel algorithm for approximating these metrics based on an encoding of robustness as a linear program. We show how our metrics can be used to evaluate the robustness of deep neural nets with experiments on the MNIST and CIFAR-10 datasets. Our algorithm generates more informative estimates of robustness metrics compared to estimates based on existing algorithms. Furthermore, we show how existing approaches to improving robustness \u201coverfit\u201d to adversarial examples generated using a specific algorithm. Finally, we show that our techniques can be used to additionally improve neural net robustness both according to the metrics that we propose, but also according to previously proposed metrics.", "bibtex": "@inproceedings{NIPS2016_980ecd05,\n author = {Bastani, Osbert and Ioannou, Yani and Lampropoulos, Leonidas and Vytiniotis, Dimitrios and Nori, Aditya and Criminisi, Antonio},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Measuring Neural Net Robustness with Constraints},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/980ecd059122ce2e50136bda65c25e07-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/980ecd059122ce2e50136bda65c25e07-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/980ecd059122ce2e50136bda65c25e07-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/980ecd059122ce2e50136bda65c25e07-Reviews.html", "metareview": "", "pdf_size": 1131693, "gs_citation": 554, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16991046340081083132&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 18, "aff": "Stanford University; University of Cambridge; University of Pennsylvania; Microsoft Research; Microsoft Research; Microsoft Research", "aff_domain": "cs.stanford.edu;cam.ac.uk;seas.upenn.edu;microsoft.com;microsoft.com;microsoft.com", "email": "cs.stanford.edu;cam.ac.uk;seas.upenn.edu;microsoft.com;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/980ecd059122ce2e50136bda65c25e07-Abstract.html", "aff_unique_index": "0;1;2;3;3;3", "aff_unique_norm": "Stanford University;University of Cambridge;University of Pennsylvania;Microsoft", "aff_unique_dep": ";;;Microsoft Research", "aff_unique_url": "https://www.stanford.edu;https://www.cam.ac.uk;https://www.upenn.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Stanford;Cambridge;UPenn;MSR", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;Cambridge;", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Measuring the reliability of MCMC inference with bidirectional Monte Carlo", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7388", "id": "7388", "author_site": "Roger Grosse, Siddharth Ancha, Daniel Roy", "author": "Roger B Grosse; Siddharth Ancha; Daniel M. Roy", "abstract": "Markov chain Monte Carlo (MCMC) is one of the main workhorses of probabilistic inference, but it is notoriously hard to measure the quality of approximate posterior samples. This challenge is particularly salient in black box inference methods, which can hide details and obscure inference failures. In this work, we extend the recently introduced bidirectional Monte Carlo technique to evaluate MCMC-based posterior inference algorithms. By running annealed importance sampling (AIS) chains both from prior to posterior and vice versa on simulated data, we upper bound in expectation the symmetrized KL divergence between the true posterior distribution and the distribution of approximate samples. We integrate our method into two probabilistic programming languages, WebPPL and Stan, and validate it on several models and datasets. As an example of how our method be used to guide the design of inference algorithms, we apply it to study the effectiveness of different model representations in WebPPL and Stan.", "bibtex": "@inproceedings{NIPS2016_0e9fa1f3,\n author = {Grosse, Roger B and Ancha, Siddharth and Roy, Daniel M},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Measuring the reliability of MCMC inference with bidirectional Monte Carlo},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0e9fa1f3e9e66792401a6972d477dcc3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0e9fa1f3e9e66792401a6972d477dcc3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0e9fa1f3e9e66792401a6972d477dcc3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0e9fa1f3e9e66792401a6972d477dcc3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0e9fa1f3e9e66792401a6972d477dcc3-Reviews.html", "metareview": "", "pdf_size": 1441164, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18302068653305101371&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, University of Toronto; Department of Computer Science, University of Toronto; Department of Statistics, University of Toronto", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0e9fa1f3e9e66792401a6972d477dcc3-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Toronto", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Memory-Efficient Backpropagation Through Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7097", "id": "7097", "author_site": "Audrunas Gruslys, Remi Munos, Ivo Danihelka, Marc Lanctot, Alex Graves", "author": "Audrunas Gruslys; Remi Munos; Ivo Danihelka; Marc Lanctot; Alex Graves", "abstract": "We propose a novel approach to reduce memory consumption of the backpropagation through time (BPTT) algorithm when training recurrent neural networks (RNNs). Our approach uses dynamic programming to balance a trade-off between caching of intermediate results and recomputation. The algorithm is capable of tightly fitting within almost any user-set memory budget while finding an optimal execution policy minimizing the computational cost. Computational devices have limited memory capacity and maximizing a computational performance given a fixed memory budget is a practical use-case. We provide asymptotic computational upper bounds for various regimes. The algorithm is particularly effective for long sequences. For sequences of length 1000, our algorithm saves 95\\% of memory usage while using only one third more time per iteration than the standard BPTT.", "bibtex": "@inproceedings{NIPS2016_a501bebf,\n author = {Gruslys, Audrunas and Munos, Remi and Danihelka, Ivo and Lanctot, Marc and Graves, Alex},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Memory-Efficient Backpropagation Through Time},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a501bebf79d570651ff601788ea9d16d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a501bebf79d570651ff601788ea9d16d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a501bebf79d570651ff601788ea9d16d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a501bebf79d570651ff601788ea9d16d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a501bebf79d570651ff601788ea9d16d-Reviews.html", "metareview": "", "pdf_size": 434261, "gs_citation": 252, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4005047433761706878&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a501bebf79d570651ff601788ea9d16d-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "MetaGrad: Multiple Learning Rates in Online Learning", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7220", "id": "7220", "author_site": "Tim van Erven, Wouter Koolen", "author": "Tim van Erven; Wouter M. Koolen", "abstract": "In online convex optimization it is well known that certain subclasses of objective functions are much easier than arbitrary convex functions. We are interested in designing adaptive methods that can automatically get fast rates in as many such subclasses as possible, without any manual tuning. Previous adaptive methods are able to interpolate between strongly convex and general convex functions. We present a new method, MetaGrad, that adapts to a much broader class of functions, including exp-concave and strongly convex functions, but also various types of stochastic and non-stochastic functions without any curvature. For instance, MetaGrad can achieve logarithmic regret on the unregularized hinge loss, even though it has no curvature, if the data come from a favourable probability distribution. MetaGrad's main feature is that it simultaneously considers multiple learning rates. Unlike all previous methods with provable regret guarantees, however, its learning rates are not monotonically decreasing over time and are not tuned based on a theoretically derived bound on the regret. Instead, they are weighted directly proportional to their empirical performance on the data using a tilted exponential weights master algorithm.", "bibtex": "@inproceedings{NIPS2016_14cfdb59,\n author = {van Erven, Tim and Koolen, Wouter M},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {MetaGrad: Multiple Learning Rates in Online Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/14cfdb59b5bda1fc245aadae15b1984a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/14cfdb59b5bda1fc245aadae15b1984a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/14cfdb59b5bda1fc245aadae15b1984a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/14cfdb59b5bda1fc245aadae15b1984a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/14cfdb59b5bda1fc245aadae15b1984a-Reviews.html", "metareview": "", "pdf_size": 530971, "gs_citation": 111, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=117997425850462677&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 21, "aff": "Leiden University; Centrum Wiskunde & Informatica", "aff_domain": "timvanerven.nl;cwi.nl", "email": "timvanerven.nl;cwi.nl", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/14cfdb59b5bda1fc245aadae15b1984a-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Leiden University;Centrum Wiskunde & Informatica", "aff_unique_dep": ";", "aff_unique_url": "https://www.leidenuniv.nl;https://www.cwi.nl/", "aff_unique_abbr": "LU;CWI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "title": "Minimax Estimation of Maximum Mean Discrepancy with Radial Kernels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7286", "id": "7286", "author_site": "Ilya Tolstikhin, Bharath Sriperumbudur, Bernhard Sch\u00f6lkopf", "author": "Ilya O Tolstikhin; Bharath K. Sriperumbudur; Bernhard Sch\u00f6lkopf", "abstract": "Maximum Mean Discrepancy (MMD) is a distance on the space of probability measures which has found numerous applications in machine learning and nonparametric testing. This distance is based on the notion of embedding probabilities in a reproducing kernel Hilbert space. In this paper, we present the first known lower bounds for the estimation of MMD based on finite samples. Our lower bounds hold for any radial universal kernel on $\\R^d$ and match the existing upper bounds up to constants that depend only on the properties of the kernel. Using these lower bounds, we establish the minimax rate optimality of the empirical estimator and its $U$-statistic variant, which are usually employed in applications.", "bibtex": "@inproceedings{NIPS2016_5055cbf4,\n author = {Tolstikhin, Ilya O and Sriperumbudur, Bharath K. and Sch\\\"{o}lkopf, Bernhard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Minimax Estimation of Maximum Mean Discrepancy with Radial Kernels},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5055cbf43fac3f7e2336b27310f0b9ef-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5055cbf43fac3f7e2336b27310f0b9ef-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/5055cbf43fac3f7e2336b27310f0b9ef-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5055cbf43fac3f7e2336b27310f0b9ef-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5055cbf43fac3f7e2336b27310f0b9ef-Reviews.html", "metareview": "", "pdf_size": 440162, "gs_citation": 166, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6696836250651675537&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Empirical Inference, MPI for Intelligent Systems, T\u00fcbingen 72076, Germany; Department of Statistics, Pennsylvania State University, University Park, PA 16802, USA; Department of Empirical Inference, MPI for Intelligent Systems, T\u00fcbingen 72076, Germany", "aff_domain": "tuebingen.mpg.de;psu.edu;tuebingen.mpg.de", "email": "tuebingen.mpg.de;psu.edu;tuebingen.mpg.de", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5055cbf43fac3f7e2336b27310f0b9ef-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Max Planck Institute for Intelligent Systems;Pennsylvania State University", "aff_unique_dep": "Department of Empirical Inference;Department of Statistics", "aff_unique_url": "https://www.mpituebingen.mpg.de;https://www.psu.edu", "aff_unique_abbr": "MPI-IS;PSU", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "T\u00fcbingen;University Park", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Minimax Optimal Alternating Minimization for Kernel Nonparametric Tensor Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7331", "id": "7331", "author_site": "Taiji Suzuki, Heishiro Kanagawa, Hayato Kobayashi, Nobuyuki Shimizu, Yukihiro Tagami", "author": "Taiji Suzuki; Heishiro Kanagawa; Hayato Kobayashi; Nobuyuki Shimizu; Yukihiro Tagami", "abstract": "We investigate the statistical performance and computational efficiency of the alternating minimization procedure for nonparametric tensor learning. Tensor modeling has been widely used for capturing the higher order relations between multimodal data sources. In addition to a linear model, a nonlinear tensor model has been received much attention recently because of its high flexibility. We consider an alternating minimization procedure for a general nonlinear model where the true function consists of components in a reproducing kernel Hilbert space (RKHS). In this paper, we show that the alternating minimization method achieves linear convergence as an optimization algorithm and that the generalization error of the resultant estimator yields the minimax optimality. We apply our algorithm to some multitask learning problems and show that the method actually shows favorable performances.", "bibtex": "@inproceedings{NIPS2016_b4568df2,\n author = {Suzuki, Taiji and Kanagawa, Heishiro and Kobayashi, Hayato and Shimizu, Nobuyuki and Tagami, Yukihiro},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Minimax Optimal Alternating Minimization for Kernel Nonparametric Tensor Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b4568df26077653eeadf29596708c94b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b4568df26077653eeadf29596708c94b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b4568df26077653eeadf29596708c94b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b4568df26077653eeadf29596708c94b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b4568df26077653eeadf29596708c94b-Reviews.html", "metareview": "", "pdf_size": 396172, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11771252439193649292&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Mathematical and Computing Science, Tokyo Institute of Technology+PRESTO, Japan Science and Technology Agency+Center for Advanced Integrated Intelligence Research, RIKEN; Department of Mathematical and Computing Science, Tokyo Institute of Technology; Yahoo Japan Corporation; Yahoo Japan Corporation; Yahoo Japan Corporation", "aff_domain": "is.titech.ac.jp;m.titech.ac.jp;yahoo-corp.jp;yahoo-corp.jp;yahoo-corp.jp", "email": "is.titech.ac.jp;m.titech.ac.jp;yahoo-corp.jp;yahoo-corp.jp;yahoo-corp.jp", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b4568df26077653eeadf29596708c94b-Abstract.html", "aff_unique_index": "0+1+2;0;3;3;3", "aff_unique_norm": "Tokyo Institute of Technology;Japan Science and Technology Agency;RIKEN;Yahoo Japan Corporation", "aff_unique_dep": "Department of Mathematical and Computing Science;PRESTO;Center for Advanced Integrated Intelligence Research;", "aff_unique_url": "https://www.titech.ac.jp;https://www.jst.go.jp;https://www.riken.jp;https://www.yahoo.co.jp", "aff_unique_abbr": "Titech;JST;RIKEN;Yahoo Japan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0+0;0;0;0;0", "aff_country_unique": "Japan" }, { "title": "Minimizing Quadratic Functions in Constant Time", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7204", "id": "7204", "author_site": "Kohei Hayashi, Yuichi Yoshida", "author": "Kohei Hayashi; Yuichi Yoshida", "abstract": "A sampling-based optimization method for quadratic functions is proposed. Our method approximately solves the following $n$-dimensional quadratic minimization problem in constant time, which is independent of $n$: $z^*=\\min_{\\bv \\in \\bbR^n}\\bracket{\\bv}{A \\bv} + n\\bracket{\\bv}{\\diag(\\bd)\\bv} + n\\bracket{\\bb}{\\bv}$, where $A \\in \\bbR^{n \\times n}$ is a matrix and $\\bd,\\bb \\in \\bbR^n$ are vectors. Our theoretical analysis specifies the number of samples $k(\\delta, \\epsilon)$ such that the approximated solution $z$ satisfies $|z - z^*| = O(\\epsilon n^2)$ with probability $1-\\delta$. The empirical performance (accuracy and runtime) is positively confirmed by numerical experiments.", "bibtex": "@inproceedings{NIPS2016_feab05aa,\n author = {Hayashi, Kohei and Yoshida, Yuichi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Minimizing Quadratic Functions in Constant Time},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/feab05aa91085b7a8012516bc3533958-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/feab05aa91085b7a8012516bc3533958-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/feab05aa91085b7a8012516bc3533958-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/feab05aa91085b7a8012516bc3533958-Reviews.html", "metareview": "", "pdf_size": 326783, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17189609610919177531&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "National Institute of Advanced Industrial Science and Technology; National Institute of Informatics + Preferred Infrastructure, Inc.", "aff_domain": "gmail.com;nii.ac.jp", "email": "gmail.com;nii.ac.jp", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/feab05aa91085b7a8012516bc3533958-Abstract.html", "aff_unique_index": "0;1+2", "aff_unique_norm": "National Institute of Advanced Industrial Science and Technology;National Institute of Informatics;Preferred Infrastructure, Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.aist.go.jp;https://www.nii.ac.jp/;", "aff_unique_abbr": "AIST;NII;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+1", "aff_country_unique": "Japan;United States" }, { "title": "Minimizing Regret on Reflexive Banach Spaces and Nash Equilibria in Continuous Zero-Sum Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7085", "id": "7085", "author_site": "Maximilian Balandat, Walid Krichene, Claire Tomlin, Alexandre Bayen", "author": "Maximilian Balandat; Walid Krichene; Claire Tomlin; Alexandre Bayen", "abstract": "We study a general adversarial online learning problem, in which we are given a decision set X' in a reflexive Banach space X and a sequence of reward vectors in the dual space of X. At each iteration, we choose an action from X', based on the observed sequence of previous rewards. Our goal is to minimize regret, defined as the gap between the realized reward and the reward of the best fixed action in hindsight. Using results from infinite dimensional convex analysis, we generalize the method of Dual Averaging (or Follow the Regularized Leader) to our setting and obtain upper bounds on the worst-case regret that generalize many previous results. Under the assumption of uniformly continuous rewards, we obtain explicit regret bounds in a setting where the decision set is the set of probability distributions on a compact metric space S. Importantly, we make no convexity assumptions on either the set S or the reward functions. We also prove a general lower bound on the worst-case regret for any online algorithm. We then apply these results to the problem of learning in repeated two-player zero-sum games on compact metric spaces. In doing so, we first prove that if both players play a Hannan-consistent strategy, then with probability 1 the empirical distributions of play weakly converge to the set of Nash equilibria of the game. We then show that, under mild assumptions, Dual Averaging on the (infinite-dimensional) space of probability distributions indeed achieves Hannan-consistency.", "bibtex": "@inproceedings{NIPS2016_3def184a,\n author = {Balandat, Maximilian and Krichene, Walid and Tomlin, Claire and Bayen, Alexandre},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Minimizing Regret on Reflexive Banach Spaces and Nash Equilibria in Continuous Zero-Sum Games},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3def184ad8f4755ff269862ea77393dd-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3def184ad8f4755ff269862ea77393dd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3def184ad8f4755ff269862ea77393dd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3def184ad8f4755ff269862ea77393dd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3def184ad8f4755ff269862ea77393dd-Reviews.html", "metareview": "", "pdf_size": 760843, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14743778640247753453&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Electrical Engineering and Computer Sciences, UC Berkeley; Electrical Engineering and Computer Sciences, UC Berkeley; Electrical Engineering and Computer Sciences, UC Berkeley; Electrical Engineering and Computer Sciences, UC Berkeley", "aff_domain": "eecs.berkeley.edu;eecs.berkeley.edu;eecs.berkeley.edu;berkeley.edu", "email": "eecs.berkeley.edu;eecs.berkeley.edu;eecs.berkeley.edu;berkeley.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3def184ad8f4755ff269862ea77393dd-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Electrical Engineering and Computer Sciences", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Mistake Bounds for Binary Matrix Completion", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8511", "id": "8511", "author_site": "Mark Herbster, Stephen Pasteris, Massimiliano Pontil", "author": "Mark Herbster; Stephen Pasteris; Massimiliano Pontil", "abstract": "We study the problem of completing a binary matrix in an online learning setting. On each trial we predict a matrix entry and then receive the true entry. We propose a Matrix Exponentiated Gradient algorithm [1] to solve this problem. We provide a mistake bound for the algorithm, which scales with the margin complexity [2, 3] of the underlying matrix. The bound suggests an interpretation where each row of the matrix is a prediction task over a finite set of objects, the columns. Using this we show that the algorithm makes a number of mistakes which is comparable up to a logarithmic factor to the number of mistakes made by the Kernel Perceptron with an optimal kernel in hindsight. We discuss applications of the algorithm to predicting as well as the best biclustering and to the problem of predicting the labeling of a graph without knowing the graph in advance.", "bibtex": "@inproceedings{NIPS2016_d46e1fcf,\n author = {Herbster, Mark and Pasteris, Stephen and Pontil, Massimiliano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mistake Bounds for Binary Matrix Completion},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d46e1fcf4c07ce4a69ee07e4134bcef1-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d46e1fcf4c07ce4a69ee07e4134bcef1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d46e1fcf4c07ce4a69ee07e4134bcef1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d46e1fcf4c07ce4a69ee07e4134bcef1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d46e1fcf4c07ce4a69ee07e4134bcef1-Reviews.html", "metareview": "", "pdf_size": 458488, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13034801023085652291&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "University College London, Department of Computer Science, London WC1E 6BT, UK; University College London, Department of Computer Science, London WC1E 6BT, UK; Istituto Italiano di Tecnologia, 16163 Genoa, Italy + University College London, Department of Computer Science, London WC1E 6BT, UK", "aff_domain": "cs.ucl.ac.uk;cs.ucl.ac.uk;cs.ucl.ac.uk", "email": "cs.ucl.ac.uk;cs.ucl.ac.uk;cs.ucl.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d46e1fcf4c07ce4a69ee07e4134bcef1-Abstract.html", "aff_unique_index": "0;0;1+0", "aff_unique_norm": "University College London;Istituto Italiano di Tecnologia", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.iit.it", "aff_unique_abbr": "UCL;IIT", "aff_campus_unique_index": "0;0;1+0", "aff_campus_unique": "London;Genoa", "aff_country_unique_index": "0;0;1+0", "aff_country_unique": "United Kingdom;Italy" }, { "title": "Mixed Linear Regression with Multiple Components", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7134", "id": "7134", "author_site": "Kai Zhong, Prateek Jain, Inderjit Dhillon", "author": "Kai Zhong; Prateek Jain; Inderjit S Dhillon", "abstract": "In this paper, we study the mixed linear regression (MLR) problem, where the goal is to recover multiple underlying linear models from their unlabeled linear measurements. We propose a non-convex objective function which we show is {\\em locally strongly convex} in the neighborhood of the ground truth. We use a tensor method for initialization so that the initial models are in the local strong convexity region. We then employ general convex optimization algorithms to minimize the objective function. To the best of our knowledge, our approach provides first exact recovery guarantees for the MLR problem with $K \\geq 2$ components. Moreover, our method has near-optimal computational complexity $\\tilde O (Nd)$ as well as near-optimal sample complexity $\\tilde O (d)$ for {\\em constant} $K$. Furthermore, we show that our non-convex formulation can be extended to solving the {\\em subspace clustering} problem as well. In particular, when initialized within a small constant distance to the true subspaces, our method converges to the global optima (and recovers true subspaces) in time {\\em linear} in the number of points. Furthermore, our empirical results indicate that even with random initialization, our approach converges to the global optima in linear time, providing speed-up of up to two orders of magnitude.", "bibtex": "@inproceedings{NIPS2016_8248a99e,\n author = {Zhong, Kai and Jain, Prateek and Dhillon, Inderjit S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mixed Linear Regression with Multiple Components},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8248a99e81e752cb9b41da3fc43fbe7f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8248a99e81e752cb9b41da3fc43fbe7f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8248a99e81e752cb9b41da3fc43fbe7f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8248a99e81e752cb9b41da3fc43fbe7f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8248a99e81e752cb9b41da3fc43fbe7f-Reviews.html", "metareview": "", "pdf_size": 492320, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3849089410556271746&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "University of Texas at Austin+Microsoft Research India; Microsoft Research India; University of Texas at Austin", "aff_domain": "ices.utexas.edu;microsoft.com;cs.utexas.edu", "email": "ices.utexas.edu;microsoft.com;cs.utexas.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8248a99e81e752cb9b41da3fc43fbe7f-Abstract.html", "aff_unique_index": "0+1;1;0", "aff_unique_norm": "University of Texas at Austin;Microsoft", "aff_unique_dep": ";Microsoft Research India", "aff_unique_url": "https://www.utexas.edu;https://www.microsoft.com/en-us/research/group/microsoft-research-india", "aff_unique_abbr": "UT Austin;MSR India", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0+1;1;0", "aff_country_unique": "United States;India" }, { "title": "Mixed vine copulas as joint models of spike counts and local field potentials", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7315", "id": "7315", "author_site": "Arno Onken, Stefano Panzeri", "author": "Arno Onken; Stefano Panzeri", "abstract": "Concurrent measurements of neural activity at multiple scales, sometimes performed with multimodal techniques, become increasingly important for studying brain function. However, statistical methods for their concurrent analysis are currently lacking. Here we introduce such techniques in a framework based on vine copulas with mixed margins to construct multivariate stochastic models. These models can describe detailed mixed interactions between discrete variables such as neural spike counts, and continuous variables such as local field potentials. We propose efficient methods for likelihood calculation, inference, sampling and mutual information estimation within this framework. We test our methods on simulated data and demonstrate applicability on mixed data generated by a biologically realistic neural network. Our methods hold the promise to considerably improve statistical analysis of neural data recorded simultaneously at different scales.", "bibtex": "@inproceedings{NIPS2016_fb89705a,\n author = {Onken, Arno and Panzeri, Stefano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mixed vine copulas as joint models of spike counts and local field potentials},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fb89705ae6d743bf1e848c206e16a1d7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fb89705ae6d743bf1e848c206e16a1d7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/fb89705ae6d743bf1e848c206e16a1d7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fb89705ae6d743bf1e848c206e16a1d7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fb89705ae6d743bf1e848c206e16a1d7-Reviews.html", "metareview": "", "pdf_size": 419069, "gs_citation": 21, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6254681071836036590&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Istituto Italiano di Tecnologia; Istituto Italiano di Tecnologia", "aff_domain": "iit.it;iit.it", "email": "iit.it;iit.it", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fb89705ae6d743bf1e848c206e16a1d7-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Istituto Italiano di Tecnologia", "aff_unique_dep": "", "aff_unique_url": "https://www.iit.it", "aff_unique_abbr": "IIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Italy" }, { "title": "MoCap-guided Data Augmentation for 3D Pose Estimation in the Wild", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8498", "id": "8498", "author_site": "Gregory Rogez, Cordelia Schmid", "author": "Gregory Rogez; Cordelia Schmid", "abstract": "This paper addresses the problem of 3D human pose estimation in the wild. A significant challenge is the lack of training data, i.e., 2D images of humans annotated with 3D poses. Such data is necessary to train state-of-the-art CNN architectures. Here, we propose a solution to generate a large set of photorealistic synthetic images of humans with 3D pose annotations. We introduce an image-based synthesis engine that artificially augments a dataset of real images with 2D human pose annotations using 3D Motion Capture (MoCap) data. Given a candidate 3D pose our algorithm selects for each joint an image whose 2D pose locally matches the projected 3D pose. The selected images are then combined to generate a new synthetic image by stitching local image patches in a kinematically constrained manner. The resulting images are used to train an end-to-end CNN for full-body 3D pose estimation. We cluster the training data into a large number of pose classes and tackle pose estimation as a K-way classification problem. Such an approach is viable only with large training sets such as ours. Our method outperforms the state of the art in terms of 3D pose estimation in controlled environments (Human3.6M) and shows promising results for in-the-wild images (LSP). This demonstrates that CNNs trained on artificial images generalize well to real images.", "bibtex": "@inproceedings{NIPS2016_35464c84,\n author = {Rogez, Gregory and Schmid, Cordelia},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {MoCap-guided Data Augmentation for 3D Pose Estimation in the Wild},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/35464c848f410e55a13bb9d78e7fddd0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/35464c848f410e55a13bb9d78e7fddd0-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/35464c848f410e55a13bb9d78e7fddd0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/35464c848f410e55a13bb9d78e7fddd0-Reviews.html", "metareview": "", "pdf_size": 4251002, "gs_citation": 338, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7144903312840770231&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Inria Grenoble Rh\u00f4ne-Alpes, Laboratoire Jean Kuntzmann, France; Inria Grenoble Rh\u00f4ne-Alpes, Laboratoire Jean Kuntzmann, France", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/35464c848f410e55a13bb9d78e7fddd0-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "INRIA Grenoble Rh\u00f4ne-Alpes", "aff_unique_dep": "Laboratoire Jean Kuntzmann", "aff_unique_url": "https://www.inria.fr/grenoble", "aff_unique_abbr": "Inria", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Grenoble", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "More Supervision, Less Computation: Statistical-Computational Tradeoffs in Weakly Supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6974", "id": "6974", "author_site": "Xinyang Yi, Zhaoran Wang, Zhuoran Yang, Constantine Caramanis, Han Liu", "author": "Xinyang Yi; Zhaoran Wang; Zhuoran Yang; Constantine Caramanis; Han Liu", "abstract": "We consider the weakly supervised binary classification problem where the labels are randomly flipped with probability $1-\\alpha$. Although there exist numerous algorithms for this problem, it remains theoretically unexplored how the statistical accuracies and computational efficiency of these algorithms depend on the degree of supervision, which is quantified by $\\alpha$. In this paper, we characterize the effect of $\\alpha$ by establishing the information-theoretic and computational boundaries, namely, the minimax-optimal statistical accuracy that can be achieved by all algorithms, and polynomial-time algorithms under an oracle computational model. For small $\\alpha$, our result shows a gap between these two boundaries, which represents the computational price of achieving the information-theoretic boundary due to the lack of supervision. Interestingly, we also show that this gap narrows as $\\alpha$ increases. In other words, having more supervision, i.e., more correct labels, not only improves the optimal statistical accuracy as expected, but also enhances the computational efficiency for achieving such accuracy.", "bibtex": "@inproceedings{NIPS2016_598920e1,\n author = {Yi, Xinyang and Wang, Zhaoran and Yang, Zhuoran and Caramanis, Constantine and Liu, Han},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {More Supervision, Less Computation: Statistical-Computational Tradeoffs in Weakly Supervised Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/598920e11d1eb2a49501d59fce5ecbb7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/598920e11d1eb2a49501d59fce5ecbb7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/598920e11d1eb2a49501d59fce5ecbb7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/598920e11d1eb2a49501d59fce5ecbb7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/598920e11d1eb2a49501d59fce5ecbb7-Reviews.html", "metareview": "", "pdf_size": 490038, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15149157846025528690&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "The University of Texas at Austin; Princeton University; Princeton University; The University of Texas at Austin; Princeton University", "aff_domain": "utexas.edu;princeton.edu;princeton.edu;utexas.edu;princeton.edu", "email": "utexas.edu;princeton.edu;princeton.edu;utexas.edu;princeton.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/598920e11d1eb2a49501d59fce5ecbb7-Abstract.html", "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Texas at Austin;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.princeton.edu", "aff_unique_abbr": "UT Austin;Princeton", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Multi-armed Bandits: Competing with Optimal Sequences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6990", "id": "6990", "author_site": "Zohar Karnin, Oren Anava", "author": "Zohar S Karnin; Oren Anava", "abstract": "We consider sequential decision making problem in the adversarial setting, where regret is measured with respect to the optimal sequence of actions and the feedback adheres the bandit setting. It is well-known that obtaining sublinear regret in this setting is impossible in general, which arises the question of when can we do better than linear regret? Previous works show that when the environment is guaranteed to vary slowly and furthermore we are given prior knowledge regarding its variation (i.e., a limit on the amount of changes suffered by the environment), then this task is feasible. The caveat however is that such prior knowledge is not likely to be available in practice, which causes the obtained regret bounds to be somewhat irrelevant. Our main result is a regret guarantee that scales with the variation parameter of the environment, without requiring any prior knowledge about it whatsoever. By that, we also resolve an open problem posted by [Gur, Zeevi and Besbes, NIPS' 14]. An important key component in our result is a statistical test for identifying non-stationarity in a sequence of independent random variables. This test either identifies non-stationarity or upper-bounds the absolute deviation of the corresponding sequence of mean values in terms of its total variation. This test is interesting on its own right and has the potential to be found useful in additional settings.", "bibtex": "@inproceedings{NIPS2016_47d1e990,\n author = {Karnin, Zohar S and Anava, Oren},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-armed Bandits: Competing with Optimal Sequences},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/47d1e990583c9c67424d369f3414728e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/47d1e990583c9c67424d369f3414728e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/47d1e990583c9c67424d369f3414728e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/47d1e990583c9c67424d369f3414728e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/47d1e990583c9c67424d369f3414728e-Reviews.html", "metareview": "", "pdf_size": 362853, "gs_citation": 68, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9715370063048685660&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "The Voleon Group, Berkeley, CA; Yahoo! Research, New York, NY", "aff_domain": "voleon.com;yahoo-inc.com", "email": "voleon.com;yahoo-inc.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/47d1e990583c9c67424d369f3414728e-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "University of California, Berkeley;Yahoo! Research", "aff_unique_dep": "The Voleon Group;", "aff_unique_url": "https://www.berkeley.edu;https://research.yahoo.com", "aff_unique_abbr": "UC Berkeley;Yahoo! Res", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Berkeley;New York", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Multi-step learning and underlying structure in statistical models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7373", "id": "7373", "author": "Maia Fraser", "abstract": "In multi-step learning, where a final learning task is accomplished via a sequence of intermediate learning tasks, the intuition is that successive steps or levels transform the initial data into representations more and more ``suited\" to the final learning task. A related principle arises in transfer-learning where Baxter (2000) proposed a theoretical framework to study how learning multiple tasks transforms the inductive bias of a learner. The most widespread multi-step learning approach is semi-supervised learning with two steps: unsupervised, then supervised. Several authors (Castelli-Cover, 1996; Balcan-Blum, 2005; Niyogi, 2008; Ben-David et al, 2008; Urner et al, 2011) have analyzed SSL, with Balcan-Blum (2005) proposing a version of the PAC learning framework augmented by a ``compatibility function\" to link concept class and unlabeled data distribution. We propose to analyze SSL and other multi-step learning approaches, much in the spirit of Baxter's framework, by defining a learning problem generatively as a joint statistical model on $X \\times Y$. This determines in a natural way the class of conditional distributions that are possible with each marginal, and amounts to an abstract form of compatibility function. It also allows to analyze both discrete and non-discrete settings. As tool for our analysis, we define a notion of $\\gamma$-uniform shattering for statistical models. We use this to give conditions on the marginal and conditional models which imply an advantage for multi-step learning approaches. In particular, we recover a more general version of a result of Poggio et al (2012): under mild hypotheses a multi-step approach which learns features invariant under successive factors of a finite group of invariances has sample complexity requirements that are additive rather than multiplicative in the size of the subgroups.", "bibtex": "@inproceedings{NIPS2016_3cf25597,\n author = {Fraser, Maia},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-step learning and underlying structure in statistical models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3cf2559725a9fdfa602ec8c887440f32-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3cf2559725a9fdfa602ec8c887440f32-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3cf2559725a9fdfa602ec8c887440f32-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3cf2559725a9fdfa602ec8c887440f32-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3cf2559725a9fdfa602ec8c887440f32-Reviews.html", "metareview": "", "pdf_size": 531425, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1824625573030738939&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Dept. of Mathematics and Statistics+Brain and Mind Research Institute, University of Ottawa, Ottawa, ON K1N 6N5, Canada", "aff_domain": "uottawa.ca", "email": "uottawa.ca", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3cf2559725a9fdfa602ec8c887440f32-Abstract.html", "aff_unique_index": "0+1", "aff_unique_norm": "University Affiliation;University of Ottawa", "aff_unique_dep": "Department of Mathematics and Statistics;Brain and Mind Research Institute", "aff_unique_url": ";https://www.uottawa.ca", "aff_unique_abbr": ";U Ottawa", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ottawa", "aff_country_unique_index": "1", "aff_country_unique": ";Canada" }, { "title": "Multi-view Anomaly Detection via Robust Probabilistic Latent Variable Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7199", "id": "7199", "author_site": "Tomoharu Iwata, Makoto Yamada", "author": "Tomoharu Iwata; Makoto Yamada", "abstract": "We propose probabilistic latent variable models for multi-view anomaly detection, which is the task of finding instances that have inconsistent views given multi-view data. With the proposed model, all views of a non-anomalous instance are assumed to be generated from a single latent vector. On the other hand, an anomalous instance is assumed to have multiple latent vectors, and its different views are generated from different latent vectors. By inferring the number of latent vectors used for each instance with Dirichlet process priors, we obtain multi-view anomaly scores. The proposed model can be seen as a robust extension of probabilistic canonical correlation analysis for noisy multi-view data. We present Bayesian inference procedures for the proposed model based on a stochastic EM algorithm. The effectiveness of the proposed model is demonstrated in terms of performance when detecting multi-view anomalies.", "bibtex": "@inproceedings{NIPS2016_0f966132,\n author = {Iwata, Tomoharu and Yamada, Makoto},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multi-view Anomaly Detection via Robust Probabilistic Latent Variable Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0f96613235062963ccde717b18f97592-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0f96613235062963ccde717b18f97592-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0f96613235062963ccde717b18f97592-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0f96613235062963ccde717b18f97592-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0f96613235062963ccde717b18f97592-Reviews.html", "metareview": "", "pdf_size": 230366, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14582731664612059889&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "NTT Communication Science Laboratories; Kyoto University", "aff_domain": "lab.ntt.co.jp;ieee.org", "email": "lab.ntt.co.jp;ieee.org", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0f96613235062963ccde717b18f97592-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "NTT Communication Science Laboratories;Kyoto University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntt-csl.com;https://www.kyoto-u.ac.jp", "aff_unique_abbr": "NTT CSL;Kyoto U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "title": "Multimodal Residual Learning for Visual QA", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7176", "id": "7176", "author_site": "Jin-Hwa Kim, Sang-Woo Lee, Donghyun Kwak, Min-Oh Heo, Jeonghee Kim, Jung-Woo Ha, Byoung-Tak Zhang", "author": "Jin-Hwa Kim; Sang-Woo Lee; Donghyun Kwak; Min-Oh Heo; Jeonghee Kim; Jung-Woo Ha; Byoung-Tak Zhang", "abstract": "Deep neural networks continue to advance the state-of-the-art of image recognition tasks with various methods. However, applications of these methods to multimodality remain limited. We present Multimodal Residual Networks (MRN) for the multimodal residual learning of visual question-answering, which extends the idea of the deep residual learning. Unlike the deep residual learning, MRN effectively learns the joint representation from visual and language information. The main idea is to use element-wise multiplication for the joint residual mappings exploiting the residual learning of the attentional models in recent studies. Various alternative models introduced by multimodality are explored based on our study. We achieve the state-of-the-art results on the Visual QA dataset for both Open-Ended and Multiple-Choice tasks. Moreover, we introduce a novel method to visualize the attention effect of the joint representations for each learning block using back-propagation algorithm, even though the visual features are collapsed without spatial information.", "bibtex": "@inproceedings{NIPS2016_9b04d152,\n author = {Kim, Jin-Hwa and Lee, Sang-Woo and Kwak, Donghyun and Heo, Min-Oh and Kim, Jeonghee and Ha, Jung-Woo and Zhang, Byoung-Tak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multimodal Residual Learning for Visual QA},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9b04d152845ec0a378394003c96da594-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9b04d152845ec0a378394003c96da594-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9b04d152845ec0a378394003c96da594-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9b04d152845ec0a378394003c96da594-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9b04d152845ec0a378394003c96da594-Reviews.html", "metareview": "", "pdf_size": 1057931, "gs_citation": 398, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15309277317698115764&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "aff": "Seoul National University; Seoul National University; Seoul National University; Seoul National University; Naver Labs, Naver Corp.; Naver Labs, Naver Corp.; Seoul National University & Surromind Robotics", "aff_domain": "bi.snu.ac.kr;bi.snu.ac.kr;bi.snu.ac.kr;bi.snu.ac.kr;navercorp.com;navercorp.com;bi.snu.ac.kr", "email": "bi.snu.ac.kr;bi.snu.ac.kr;bi.snu.ac.kr;bi.snu.ac.kr;navercorp.com;navercorp.com;bi.snu.ac.kr", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9b04d152845ec0a378394003c96da594-Abstract.html", "aff_unique_index": "0;0;0;0;1;1;0", "aff_unique_norm": "Seoul National University;NAVER Corp.", "aff_unique_dep": ";Naver Labs", "aff_unique_url": "https://www.snu.ac.kr;https://www.naver.com", "aff_unique_abbr": "SNU;Naver", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Multiple-Play Bandits in the Position-Based Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6951", "id": "6951", "author_site": "Paul Lagr\u00e9e, Claire Vernade, Olivier Cappe", "author": "Paul Lagr\u00e9e; Claire Vernade; Olivier Cappe", "abstract": "Sequentially learning to place items in multi-position displays or lists is a task that can be cast into the multiple-play semi-bandit setting. However, a major concern in this context is when the system cannot decide whether the user feedback for each item is actually exploitable. Indeed, much of the content may have been simply ignored by the user. The present work proposes to exploit available information regarding the display position bias under the so-called Position-based click model (PBM). We first discuss how this model differs from the Cascade model and its variants considered in several recent works on multiple-play bandits. We then provide a novel regret lower bound for this model as well as computationally efficient algorithms that display good empirical and theoretical performance.", "bibtex": "@inproceedings{NIPS2016_51ef186e,\n author = {Lagr\\'{e}e, Paul and Vernade, Claire and Cappe, Olivier},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multiple-Play Bandits in the Position-Based Model},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/51ef186e18dc00c2d31982567235c559-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/51ef186e18dc00c2d31982567235c559-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/51ef186e18dc00c2d31982567235c559-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/51ef186e18dc00c2d31982567235c559-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/51ef186e18dc00c2d31982567235c559-Reviews.html", "metareview": "", "pdf_size": 565504, "gs_citation": 113, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5257290824079264682&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "LRI, Universit\u00e9 Paris Sud+Universit\u00e9 Paris Saclay; LTCI, CNRS, T\u00e9l\u00e9com ParisTech+Universit\u00e9 Paris Saclay; LTCI, CNRS+T\u00e9l\u00e9com ParisTech+Universit\u00e9 Paris Saclay", "aff_domain": "u-psud.fr;enst.fr; ", "email": "u-psud.fr;enst.fr; ", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/51ef186e18dc00c2d31982567235c559-Abstract.html", "aff_unique_index": "0+1;2+1;3+2+1", "aff_unique_norm": "Universit\u00e9 Paris Sud;Universit\u00e9 Paris Saclay;T\u00e9l\u00e9com ParisTech;CNRS", "aff_unique_dep": "LRI;;LTCI;LTCI", "aff_unique_url": "https://www.universite-paris-sud.fr;https://www.universite-paris-saclay.fr;https://www.telecom-paristech.fr;https://www.cnrs.fr", "aff_unique_abbr": ";UPSaclay;T\u00e9l\u00e9com ParisTech;CNRS", "aff_campus_unique_index": ";;", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0+0;0+0+0", "aff_country_unique": "France" }, { "title": "Multistage Campaigning in Social Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7379", "id": "7379", "author_site": "Mehrdad Farajtabar, Xiaojing Ye, Sahar Harati, Le Song, Hongyuan Zha", "author": "Mehrdad Farajtabar; Xiaojing Ye; Sahar Harati; Le Song; Hongyuan Zha", "abstract": "We consider control problems for multi-stage campaigning over social networks. The dynamic programming framework is employed to balance the high present reward and large penalty on low future outcome in the presence of extensive uncertainties. In particular, we establish theoretical foundations of optimal campaigning over social networks where the user activities are modeled as a multivariate Hawkes process, and we derive a time dependent linear relation between the intensity of exogenous events and several commonly used objective functions of campaigning. We further develop a convex dynamic programming framework for determining the optimal intervention policy that prescribes the required level of external drive at each stage for the desired campaigning result. Experiments on both synthetic data and the real-world MemeTracker dataset show that our algorithm can steer the user activities for optimal campaigning much more accurately than baselines.", "bibtex": "@inproceedings{NIPS2016_b0904096,\n author = {Farajtabar, Mehrdad and Ye, Xiaojing and Harati, Sahar and Song, Le and Zha, Hongyuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multistage Campaigning in Social Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b090409688550f3cc93f4ed88ec6cafb-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b090409688550f3cc93f4ed88ec6cafb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b090409688550f3cc93f4ed88ec6cafb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b090409688550f3cc93f4ed88ec6cafb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b090409688550f3cc93f4ed88ec6cafb-Reviews.html", "metareview": "", "pdf_size": 258418, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1824216989202200703&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Georgia Institute of Technology\u2217; Georgia State University\u22c4; Emory University\u2020; Georgia Institute of Technology\u2217; Georgia Institute of Technology\u2217", "aff_domain": "gatech.edu;gsu.edu;emory.edu;cc.gatech.edu;cc.gatech.edu", "email": "gatech.edu;gsu.edu;emory.edu;cc.gatech.edu;cc.gatech.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b090409688550f3cc93f4ed88ec6cafb-Abstract.html", "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Georgia Institute of Technology;Georgia State University;Emory University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.gatech.edu;https://www.gsu.edu;https://www.emory.edu", "aff_unique_abbr": "Georgia Tech;GSU;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Multivariate tests of association based on univariate tests", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7095", "id": "7095", "author_site": "Ruth Heller, Yair Heller", "author": "Ruth Heller; Yair Heller", "abstract": "For testing two vector random variables for independence, we propose testing whether the distance of one vector from an arbitrary center point is independent from the distance of the other vector from another arbitrary center point by a univariate test. We prove that under minimal assumptions, it is enough to have a consistent univariate independence test on the distances, to guarantee that the power to detect dependence between the random vectors increases to one with sample size. If the univariate test is distribution-free, the multivariate test will also be distribution-free. If we consider multiple center points and aggregate the center-specific univariate tests, the power may be further improved, and the resulting multivariate test may be distribution-free for specific aggregation methods (if the univariate test is distribution-free). We show that certain multivariate tests recently proposed in the literature can be viewed as instances of this general approach. Moreover, we show in experiments that novel tests constructed using our approach can have better power and computational time than competing approaches.", "bibtex": "@inproceedings{NIPS2016_7ef605fc,\n author = {Heller, Ruth and Heller, Yair},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Multivariate tests of association based on univariate tests},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7ef605fc8dba5425d6965fbd4c8fbe1f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7ef605fc8dba5425d6965fbd4c8fbe1f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7ef605fc8dba5425d6965fbd4c8fbe1f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7ef605fc8dba5425d6965fbd4c8fbe1f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7ef605fc8dba5425d6965fbd4c8fbe1f-Reviews.html", "metareview": "", "pdf_size": 414033, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15794194160865849357&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Department of Statistics and Operations Research, Tel-Aviv University, Tel-Aviv, Israel 6997801; ", "aff_domain": "gmail.com;gmail.com", "email": "gmail.com;gmail.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7ef605fc8dba5425d6965fbd4c8fbe1f-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Tel-Aviv University", "aff_unique_dep": "Department of Statistics and Operations Research", "aff_unique_url": "https://www.tau.ac.il", "aff_unique_abbr": "TAU", "aff_campus_unique_index": "0", "aff_campus_unique": "Tel-Aviv", "aff_country_unique_index": "0", "aff_country_unique": "Israel" }, { "title": "Mutual information for symmetric rank-one matrix estimation: A proof of the replica formula", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6975", "id": "6975", "author_site": "jean barbier, Mohamad Dia, Nicolas Macris, Florent Krzakala, Thibault Lesieur, Lenka Zdeborov\u00e1", "author": "jean barbier; Mohamad Dia; Nicolas Macris; Florent Krzakala; Thibault Lesieur; Lenka Zdeborov\u00e1", "abstract": "Factorizing low-rank matrices has many applications in machine learning and statistics. For probabilistic models in the Bayes optimal setting, a general expression for the mutual information has been proposed using heuristic statistical physics computations, and proven in few specific cases. Here, we show how to rigorously prove the conjectured formula for the symmetric rank-one case. This allows to express the minimal mean-square-error and to characterize the detectability phase transitions in a large set of estimation problems ranging from community detection to sparse PCA. We also show that for a large set of parameters, an iterative algorithm called approximate message-passing is Bayes optimal. There exists, however, a gap between what currently known polynomial algorithms can do and what is expected information theoretically. Additionally, the proof technique has an interest of its own and exploits three essential ingredients: the interpolation method introduced in statistical physics by Guerra, the analysis of the approximate message-passing algorithm and the theory of spatial coupling and threshold saturation in coding. Our approach is generic and applicable to other open problems in statistical estimation where heuristic statistical physics predictions are available.", "bibtex": "@inproceedings{NIPS2016_621bf66d,\n author = {barbier, jean and Dia, Mohamad and Macris, Nicolas and Krzakala, Florent and Lesieur, Thibault and Zdeborov\\'{a}, Lenka},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Mutual information for symmetric rank-one matrix estimation: A proof of the replica formula},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/621bf66ddb7c962aa0d22ac97d69b793-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/621bf66ddb7c962aa0d22ac97d69b793-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/621bf66ddb7c962aa0d22ac97d69b793-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/621bf66ddb7c962aa0d22ac97d69b793-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/621bf66ddb7c962aa0d22ac97d69b793-Reviews.html", "metareview": "", "pdf_size": 421622, "gs_citation": 217, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14649179409682882769&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 16, "aff": "Laboratoire de Th\u00e9orie des Communications, Facult\u00e9 Informatique et Communications, Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne, 1015, Suisse; Laboratoire de Th\u00e9orie des Communications, Facult\u00e9 Informatique et Communications, Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne, 1015, Suisse; Laboratoire de Th\u00e9orie des Communications, Facult\u00e9 Informatique et Communications, Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne, 1015, Suisse; Laboratoire de Physique Statistique, CNRS, PSL Universit\u00e9s et Ecole Normale Sup\u00e9rieure, Sorbonne Universit\u00e9s et Universit\u00e9 Pierre & Marie Curie, 75005, Paris, France; Institut de Physique Th\u00e9orique, CNRS, CEA, Universit\u00e9 Paris-Saclay, F-91191, Gif-sur-Yvette, France; Institut de Physique Th\u00e9orique, CNRS, CEA, Universit\u00e9 Paris-Saclay, F-91191, Gif-sur-Yvette, France", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;ens.fr;gmail.com;gmail.com", "email": "epfl.ch;epfl.ch;epfl.ch;ens.fr;gmail.com;gmail.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/621bf66ddb7c962aa0d22ac97d69b793-Abstract.html", "aff_unique_index": "0;0;0;1;2;2", "aff_unique_norm": "EPFL;CNRS;Universit\u00e9 Paris-Saclay", "aff_unique_dep": "Facult\u00e9 Informatique et Communications;Laboratoire de Physique Statistique;Institut de Physique Th\u00e9orique", "aff_unique_url": "https://www.epfl.ch;https://www.cnrs.fr;https://www.universite-paris-saclay.fr", "aff_unique_abbr": "EPFL;CNRS;UPS", "aff_campus_unique_index": "0;0;0;1;2;2", "aff_campus_unique": "Lausanne;Paris;Gif-sur-Yvette", "aff_country_unique_index": "0;0;0;1;1;1", "aff_country_unique": "Switzerland;France" }, { "title": "NESTT: A Nonconvex Primal-Dual Splitting Method for Distributed and Stochastic Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7062", "id": "7062", "author_site": "Davood Hajinezhad, Mingyi Hong, Tuo Zhao, Zhaoran Wang", "author": "Davood Hajinezhad; Mingyi Hong; Tuo Zhao; Zhaoran Wang", "abstract": "We study a stochastic and distributed algorithm for nonconvex problems whose objective consists a sum $N$ nonconvex $L_i/N$-smooth functions, plus a nonsmooth regularizer. The proposed NonconvEx primal-dual SpliTTing (NESTT) algorithm splits the problem into $N$ subproblems, and utilizes an augmented Lagrangian based primal-dual scheme to solve it in a distributed and stochastic manner. With a special non-uniform sampling, a version of NESTT achieves $\\epsilon$-stationary solution using $\\mathcal{O}((\\sum_{i=1}^N\\sqrt{L_i/N})^2/\\epsilon)$ gradient evaluations, which can be up to $\\mathcal{O}(N)$ times better than the (proximal) gradient descent methods. It also achieves Q-linear convergence rate for nonconvex $\\ell_1$ penalized quadratic problems with polyhedral constraints. Further, we reveal a fundamental connection between {\\it primal-dual} based methods and a few {\\it primal only} methods such as IAG/SAG/SAGA.", "bibtex": "@inproceedings{NIPS2016_495dabfd,\n author = {Hajinezhad, Davood and Hong, Mingyi and Zhao, Tuo and Wang, Zhaoran},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {NESTT: A Nonconvex Primal-Dual Splitting Method for Distributed and Stochastic Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/495dabfd0ca768a3c3abd672079f48b6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/495dabfd0ca768a3c3abd672079f48b6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/495dabfd0ca768a3c3abd672079f48b6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/495dabfd0ca768a3c3abd672079f48b6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/495dabfd0ca768a3c3abd672079f48b6-Reviews.html", "metareview": "", "pdf_size": 413366, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6309238005906282257&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 13, "aff": "Department of Industrial & Manufacturing Systems Engineering and Department of Electrical & Computer Engineering, Iowa State University; Department of Industrial & Manufacturing Systems Engineering and Department of Electrical & Computer Engineering, Iowa State University; School of Industrial and Systems Engineering, Georgia Institute of Technology; Department of Operations Research, Princeton University", "aff_domain": "iastate.edu;iastate.edu;gatech.edu;princeton.edu", "email": "iastate.edu;iastate.edu;gatech.edu;princeton.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/495dabfd0ca768a3c3abd672079f48b6-Abstract.html", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Iowa State University;Georgia Institute of Technology;Princeton University", "aff_unique_dep": "Department of Industrial & Manufacturing Systems Engineering;School of Industrial and Systems Engineering;Department of Operations Research", "aff_unique_url": "https://www.iastate.edu;https://www.gatech.edu;https://www.princeton.edu", "aff_unique_abbr": "ISU;Georgia Tech;Princeton", "aff_campus_unique_index": "1", "aff_campus_unique": ";Atlanta", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Natural-Parameter Networks: A Class of Probabilistic Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7268", "id": "7268", "author_site": "Hao Wang, Xingjian SHI, Dit-Yan Yeung", "author": "Hao Wang; Xingjian SHI; Dit-Yan Yeung", "abstract": "Neural networks (NN) have achieved state-of-the-art performance in various applications. Unfortunately in applications where training data is insufficient, they are often prone to overfitting. One effective way to alleviate this problem is to exploit the Bayesian approach by using Bayesian neural networks (BNN). Another shortcoming of NN is the lack of flexibility to customize different distributions for the weights and neurons according to the data, as is often done in probabilistic graphical models. To address these problems, we propose a class of probabilistic neural networks, dubbed natural-parameter networks (NPN), as a novel and lightweight Bayesian treatment of NN. NPN allows the usage of arbitrary exponential-family distributions to model the weights and neurons. Different from traditional NN and BNN, NPN takes distributions as input and goes through layers of transformation before producing distributions to match the target output distributions. As a Bayesian treatment, efficient backpropagation (BP) is performed to learn the natural parameters for the distributions over both the weights and neurons. The output distributions of each layer, as byproducts, may be used as second-order representations for the associated tasks such as link prediction. Experiments on real-world datasets show that NPN can achieve state-of-the-art performance.", "bibtex": "@inproceedings{NIPS2016_fe9fc289,\n author = {Wang, Hao and SHI, Xingjian and Yeung, Dit-Yan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Natural-Parameter Networks: A Class of Probabilistic Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fe9fc289c3ff0af142b6d3bead98a923-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fe9fc289c3ff0af142b6d3bead98a923-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/fe9fc289c3ff0af142b6d3bead98a923-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fe9fc289c3ff0af142b6d3bead98a923-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fe9fc289c3ff0af142b6d3bead98a923-Reviews.html", "metareview": "", "pdf_size": 543869, "gs_citation": 99, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18428071103635932773&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "Hong Kong University of Science and Technology; Hong Kong University of Science and Technology; Hong Kong University of Science and Technology", "aff_domain": "cse.ust.hk;cse.ust.hk;cse.ust.hk", "email": "cse.ust.hk;cse.ust.hk;cse.ust.hk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fe9fc289c3ff0af142b6d3bead98a923-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Near-Optimal Smoothing of Structured Conditional Probability Matrices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7032", "id": "7032", "author_site": "Moein Falahatgar, Mesrob Ohannessian, Alon Orlitsky", "author": "Moein Falahatgar; Mesrob I Ohannessian; Alon Orlitsky", "abstract": "Utilizing the structure of a probabilistic model can significantly increase its learning speed. Motivated by several recent applications, in particular bigram models in language processing, we consider learning low-rank conditional probability matrices under expected KL-risk. This choice makes smoothing, that is the careful handling of low-probability elements, paramount. We derive an iterative algorithm that extends classical non-negative matrix factorization to naturally incorporate additive smoothing and prove that it converges to the stationary points of a penalized empirical risk. We then derive sample-complexity bounds for the global minimizer of the penalized risk and show that it is within a small factor of the optimal sample complexity. This framework generalizes to more sophisticated smoothing techniques, including absolute-discounting.", "bibtex": "@inproceedings{NIPS2016_8bdb5058,\n author = {Falahatgar, Moein and Ohannessian, Mesrob I and Orlitsky, Alon},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Near-Optimal Smoothing of Structured Conditional Probability Matrices},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8bdb5058376143fa358981954e7626b8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8bdb5058376143fa358981954e7626b8-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8bdb5058376143fa358981954e7626b8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8bdb5058376143fa358981954e7626b8-Reviews.html", "metareview": "", "pdf_size": 453065, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3930654327824439916&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "University of California, San Diego; Toyota Technological Institute at Chicago; University of California, San Diego", "aff_domain": "ucsd.edu;ttic.edu;ucsd.edu", "email": "ucsd.edu;ttic.edu;ucsd.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8bdb5058376143fa358981954e7626b8-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, San Diego;Toyota Technological Institute at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://www.tti-chicago.org", "aff_unique_abbr": "UCSD;TTI Chicago", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "San Diego;Chicago", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Nearly Isometric Embedding by Relaxation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6890", "id": "6890", "author_site": "James McQueen, Marina Meila, Dominique Perrault-Joncas", "author": "James McQueen; Marina Meila; Dominique Joncas", "abstract": "Many manifold learning algorithms aim to create embeddings with low or no distortion (i.e. isometric). If the data has intrinsic dimension d, it is often impossible to obtain an isometric embedding in d dimensions, but possible in s > d dimensions. Yet, most geometry preserving algorithms cannot do the latter. This paper proposes an embedding algorithm that overcomes this problem. The algorithm directly computes, for any data embedding Y, a distortion loss(Y), and iteratively updates Y in order to decrease it. The distortion measure we propose is based on the push-forward Riemannian metric associated with the coordinates Y. The experiments confirm the superiority of our algorithm in obtaining low distortion embeddings.", "bibtex": "@inproceedings{NIPS2016_cf1f78fe,\n author = {McQueen, James and Meila, Marina and Joncas, Dominique},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Nearly Isometric Embedding by Relaxation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/cf1f78fe923afe05f7597da2be7a3da8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/cf1f78fe923afe05f7597da2be7a3da8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/cf1f78fe923afe05f7597da2be7a3da8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/cf1f78fe923afe05f7597da2be7a3da8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/cf1f78fe923afe05f7597da2be7a3da8-Reviews.html", "metareview": "", "pdf_size": 5237584, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17800890429414874101&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Department of Statistics, University of Washington; Department of Statistics, University of Washington; Google", "aff_domain": "u.washington.edu;stat.washington.edu;gmail.com", "email": "u.washington.edu;stat.washington.edu;gmail.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/cf1f78fe923afe05f7597da2be7a3da8-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Washington;Google", "aff_unique_dep": "Department of Statistics;Google", "aff_unique_url": "https://www.washington.edu;https://www.google.com", "aff_unique_abbr": "UW;Google", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "Seattle;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Nested Mini-Batch K-Means", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7089", "id": "7089", "author_site": "James Newling, Fran\u00e7ois Fleuret", "author": "James Newling; Fran\u00e7ois Fleuret", "abstract": "A new algorithm is proposed which accelerates the mini-batch k-means algorithm of Sculley (2010) by using the distance bounding approach of Elkan (2003). We argue that, when incorporating distance bounds into a mini-batch algorithm, already used data should preferentially be reused. To this end we propose using nested mini-batches, whereby data in a mini-batch at iteration t is automatically reused at iteration t+1. Using nested mini-batches presents two difficulties. The first is that unbalanced use of data can bias estimates, which we resolve by ensuring that each data sample contributes exactly once to centroids. The second is in choosing mini-batch sizes, which we address by balancing premature fine-tuning of centroids with redundancy induced slow-down. Experiments show that the resulting nmbatch algorithm is very effective, often arriving within 1\\% of the empirical minimum 100 times earlier than the standard mini-batch algorithm.", "bibtex": "@inproceedings{NIPS2016_8d317bdc,\n author = {Newling, James and Fleuret, Fran\\c{c}ois},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Nested Mini-Batch K-Means},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8d317bdcf4aafcfc22149d77babee96d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8d317bdcf4aafcfc22149d77babee96d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8d317bdcf4aafcfc22149d77babee96d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8d317bdcf4aafcfc22149d77babee96d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8d317bdcf4aafcfc22149d77babee96d-Reviews.html", "metareview": "", "pdf_size": 1724461, "gs_citation": 62, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10537864779373667459&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Idiap Research Institue & EPFL; Idiap Research Institue & EPFL", "aff_domain": "idiap.ch;idiap.ch", "email": "idiap.ch;idiap.ch", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8d317bdcf4aafcfc22149d77babee96d-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Idiap Research Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.idiap.ch", "aff_unique_abbr": "Idiap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "title": "Neural Universal Discrete Denoiser", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7141", "id": "7141", "author_site": "Taesup Moon, Seonwoo Min, Byunghan Lee, Sungroh Yoon", "author": "Taesup Moon; Seonwoo Min; Byunghan Lee; Sungroh Yoon", "abstract": "We present a new framework of applying deep neural networks (DNN) to devise a universal discrete denoiser. Unlike other approaches that utilize supervised learning for denoising, we do not require any additional training data. In such setting, while the ground-truth label, i.e., the clean data, is not available, we devise ``pseudo-labels'' and a novel objective function such that DNN can be trained in a same way as supervised learning to become a discrete denoiser. We experimentally show that our resulting algorithm, dubbed as Neural DUDE, significantly outperforms the previous state-of-the-art in several applications with a systematic rule of choosing the hyperparameter, which is an attractive feature in practice.", "bibtex": "@inproceedings{NIPS2016_f8363057,\n author = {Moon, Taesup and Min, Seonwoo and Lee, Byunghan and Yoon, Sungroh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neural Universal Discrete Denoiser},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f83630579d055dc5843ae693e7cdafe0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f83630579d055dc5843ae693e7cdafe0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f83630579d055dc5843ae693e7cdafe0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f83630579d055dc5843ae693e7cdafe0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f83630579d055dc5843ae693e7cdafe0-Reviews.html", "metareview": "", "pdf_size": 856427, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14104142343130794154&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "DGIST; Seoul National University; Seoul National University; Seoul National University", "aff_domain": "dgist.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "email": "dgist.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f83630579d055dc5843ae693e7cdafe0-Abstract.html", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Daegu Gyeongbuk Institute of Science and Technology;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.dgist.ac.kr;https://www.snu.ac.kr", "aff_unique_abbr": "DGIST;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "title": "Neurally-Guided Procedural Models: Amortized Inference for Procedural Graphics Programs using Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7021", "id": "7021", "author_site": "Daniel Ritchie, Anna Thomas, Pat Hanrahan, Noah Goodman", "author": "Daniel Ritchie; Anna Thomas; Pat Hanrahan; Noah Goodman", "abstract": "Probabilistic inference algorithms such as Sequential Monte Carlo (SMC) provide powerful tools for constraining procedural models in computer graphics, but they require many samples to produce desirable results. In this paper, we show how to create procedural models which learn how to satisfy constraints. We augment procedural models with neural networks which control how the model makes random choices based on the output it has generated thus far. We call such models neurally-guided procedural models. As a pre-computation, we train these models to maximize the likelihood of example outputs generated via SMC. They are then used as efficient SMC importance samplers, generating high-quality results with very few samples. We evaluate our method on L-system-like models with image-based constraints. Given a desired quality threshold, neurally-guided models can generate satisfactory results up to 10x faster than unguided models.", "bibtex": "@inproceedings{NIPS2016_40008b9a,\n author = {Ritchie, Daniel and Thomas, Anna and Hanrahan, Pat and Goodman, Noah},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neurally-Guided Procedural Models: Amortized Inference for Procedural Graphics Programs using Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/40008b9a5380fcacce3976bf7c08af5b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/40008b9a5380fcacce3976bf7c08af5b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/40008b9a5380fcacce3976bf7c08af5b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/40008b9a5380fcacce3976bf7c08af5b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/40008b9a5380fcacce3976bf7c08af5b-Reviews.html", "metareview": "", "pdf_size": 2194195, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16229807379025132211&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/40008b9a5380fcacce3976bf7c08af5b-Abstract.html" }, { "title": "Neurons Equipped with Intrinsic Plasticity Learn Stimulus Intensity Statistics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6967", "id": "6967", "author_site": "Travis Monk, Cristina Savin, J\u00f6rg L\u00fccke", "author": "Travis Monk; Cristina Savin; J\u00f6rg L\u00fccke", "abstract": "Experience constantly shapes neural circuits through a variety of plasticity mechanisms. While the functional roles of some plasticity mechanisms are well-understood, it remains unclear how changes in neural excitability contribute to learning. Here, we develop a normative interpretation of intrinsic plasticity (IP) as a key component of unsupervised learning. We introduce a novel generative mixture model that accounts for the class-specific statistics of stimulus intensities, and we derive a neural circuit that learns the input classes and their intensities. We will analytically show that inference and learning for our generative model can be achieved by a neural circuit with intensity-sensitive neurons equipped with a specific form of IP. Numerical experiments verify our analytical derivations and show robust behavior for artificial and natural stimuli. Our results link IP to non-trivial input statistics, in particular the statistics of stimulus intensities for classes to which a neuron is sensitive. More generally, our work paves the way toward new classification algorithms that are robust to intensity variations.", "bibtex": "@inproceedings{NIPS2016_3b92d18a,\n author = {Monk, Travis and Savin, Cristina and L\\\"{u}cke, J\\\"{o}rg},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Neurons Equipped with Intrinsic Plasticity Learn Stimulus Intensity Statistics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3b92d18aa7a6176dd37d372bc2f1eb71-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3b92d18aa7a6176dd37d372bc2f1eb71-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3b92d18aa7a6176dd37d372bc2f1eb71-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3b92d18aa7a6176dd37d372bc2f1eb71-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3b92d18aa7a6176dd37d372bc2f1eb71-Reviews.html", "metareview": "", "pdf_size": 1178215, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12329005025557210577&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Cluster of Excellence Hearing4all, University of Oldenburg; IST Austria; Cluster of Excellence Hearing4all, University of Oldenburg", "aff_domain": "uol.de;ist.ac.at;uol.de", "email": "uol.de;ist.ac.at;uol.de", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3b92d18aa7a6176dd37d372bc2f1eb71-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Oldenburg;Institute of Science and Technology Austria", "aff_unique_dep": "Cluster of Excellence Hearing4all;", "aff_unique_url": "https://www.uni-oldenburg.de;https://www.ist.ac.at", "aff_unique_abbr": ";IST Austria", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;Austria" }, { "title": "New Liftable Classes for First-Order Probabilistic Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7297", "id": "7297", "author_site": "Seyed Mehran Kazemi, Angelika Kimmig, Guy Van den Broeck, David Poole", "author": "Seyed Mehran Kazemi; Angelika Kimmig; Guy Van den Broeck; David Poole", "abstract": "Statistical relational models provide compact encodings of probabilistic dependencies in relational domains, but result in highly intractable graphical models. The goal of lifted inference is to carry out probabilistic inference without needing to reason about each individual separately, by instead treating exchangeable, undistinguished objects as a whole. In this paper, we study the domain recursion inference rule, which, despite its central role in early theoretical results on domain-lifted inference, has later been believed redundant. We show that this rule is more powerful than expected, and in fact significantly extends the range of models for which lifted inference runs in time polynomial in the number of individuals in the domain. This includes an open problem called S4, the symmetric transitivity model, and a first-order logic encoding of the birthday paradox. We further identify new classes S2FO2 and S2RU of domain-liftable theories, which respectively subsume FO2 and recursively unary theories, the largest classes of domain-liftable theories known so far, and show that using domain recursion can achieve exponential speedup even in theories that cannot fully be lifted with the existing set of inference rules.", "bibtex": "@inproceedings{NIPS2016_c88d8d0a,\n author = {Kazemi, Seyed Mehran and Kimmig, Angelika and Van den Broeck, Guy and Poole, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {New Liftable Classes for First-Order Probabilistic Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c88d8d0a6097754525e02c2246d8d27f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c88d8d0a6097754525e02c2246d8d27f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c88d8d0a6097754525e02c2246d8d27f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c88d8d0a6097754525e02c2246d8d27f-Reviews.html", "metareview": "", "pdf_size": 540224, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13496636099506324398&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "The University of British Columbia; KU Leuven; University of California, Los Angeles; The University of British Columbia", "aff_domain": "cs.ubc.ca;cs.kuleuven.be;cs.ucla.edu;cs.ubc.ca", "email": "cs.ubc.ca;cs.kuleuven.be;cs.ucla.edu;cs.ubc.ca", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c88d8d0a6097754525e02c2246d8d27f-Abstract.html", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of British Columbia;Katholieke Universiteit Leuven;University of California, Los Angeles", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ubc.ca;https://www.kuleuven.be;https://www.ucla.edu", "aff_unique_abbr": "UBC;KU Leuven;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Canada;Belgium;United States" }, { "title": "Noise-Tolerant Life-Long Matrix Completion via Adaptive Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6940", "id": "6940", "author_site": "Maria-Florina Balcan, Hongyang Zhang", "author": "Maria-Florina F Balcan; Hongyang Zhang", "abstract": "We study the problem of recovering an incomplete $m\\times n$ matrix of rank $r$ with columns arriving online over time. This is known as the problem of life-long matrix completion, and is widely applied to recommendation system, computer vision, system identification, etc. The challenge is to design provable algorithms tolerant to a large amount of noises, with small sample complexity. In this work, we give algorithms achieving strong guarantee under two realistic noise models. In bounded deterministic noise, an adversary can add any bounded yet unstructured noise to each column. For this problem, we present an algorithm that returns a matrix of a small error, with sample complexity almost as small as the best prior results in the noiseless case. For sparse random noise, where the corrupted columns are sparse and drawn randomly, we give an algorithm that exactly recovers an $\\mu_0$-incoherent matrix by probability at least $1-\\delta$ with sample complexity as small as $O(\\mu_0rn\\log(r/\\delta))$. This result advances the state-of-the-art work and matches the lower bound in a worst case. We also study the scenario where the hidden matrix lies on a mixture of subspaces and show that the sample complexity can be even smaller. Our proposed algorithms perform well experimentally in both synthetic and real-world datasets.", "bibtex": "@inproceedings{NIPS2016_dc09c97f,\n author = {Balcan, Maria-Florina F and Zhang, Hongyang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Noise-Tolerant Life-Long Matrix Completion via Adaptive Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/dc09c97fd73d7a324bdbfe7c79525f64-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/dc09c97fd73d7a324bdbfe7c79525f64-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/dc09c97fd73d7a324bdbfe7c79525f64-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/dc09c97fd73d7a324bdbfe7c79525f64-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/dc09c97fd73d7a324bdbfe7c79525f64-Reviews.html", "metareview": "", "pdf_size": 851961, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10004389258133783960&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Machine Learning Department, Carnegie Mellon University, USA; Machine Learning Department, Carnegie Mellon University, USA", "aff_domain": "cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/dc09c97fd73d7a324bdbfe7c79525f64-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Machine Learning Department", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Normalized Spectral Map Synchronization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6977", "id": "6977", "author_site": "Yanyao Shen, Qixing Huang, Nati Srebro, Sujay Sanghavi", "author": "Yanyao Shen; Qixing Huang; Nati Srebro; Sujay Sanghavi", "abstract": "The algorithmic advancement of synchronizing maps is important in order to solve a wide range of practice problems with possible large-scale dataset. In this paper, we provide theoretical justifications for spectral techniques for the map synchronization problem, i.e., it takes as input a collection of objects and noisy maps estimated between pairs of objects, and outputs clean maps between all pairs of objects. We show that a simple normalized spectral method that projects the blocks of the top eigenvectors of a data matrix to the map space leads to surprisingly good results. As the noise is modelled naturally as random permutation matrix, this algorithm NormSpecSync leads to competing theoretical guarantees as state-of-the-art convex optimization techniques, yet it is much more efficient. We demonstrate the usefulness of our algorithm in a couple of applications, where it is optimal in both complexity and exactness among existing methods.", "bibtex": "@inproceedings{NIPS2016_bb03e43f,\n author = {Shen, Yanyao and Huang, Qixing and Srebro, Nati and Sanghavi, Sujay},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Normalized Spectral Map Synchronization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/bb03e43ffe34eeb242a2ee4a4f125e56-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/bb03e43ffe34eeb242a2ee4a4f125e56-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/bb03e43ffe34eeb242a2ee4a4f125e56-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/bb03e43ffe34eeb242a2ee4a4f125e56-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/bb03e43ffe34eeb242a2ee4a4f125e56-Reviews.html", "metareview": "", "pdf_size": 411000, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8756636322718923135&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "UT Austin; TTI Chicago+UT Austin; TTI Chicago; UT Austin", "aff_domain": "utexas.edu;cs.utexas.edu;ttic.edu;mail.utexas.edu", "email": "utexas.edu;cs.utexas.edu;ttic.edu;mail.utexas.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/bb03e43ffe34eeb242a2ee4a4f125e56-Abstract.html", "aff_unique_index": "0;1+0;1;0", "aff_unique_norm": "University of Texas at Austin;Toyota Technological Institute at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.tti-chicago.org", "aff_unique_abbr": "UT Austin;TTI", "aff_campus_unique_index": "0;1+0;1;0", "aff_campus_unique": "Austin;Chicago", "aff_country_unique_index": "0;0+0;0;0", "aff_country_unique": "United States" }, { "title": "Object based Scene Representations using Fisher Scores of Local Subspace Projections", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8497", "id": "8497", "author_site": "Mandar D Dixit, Nuno Vasconcelos", "author": "Mandar D Dixit; Nuno Vasconcelos", "abstract": "Several works have shown that deep CNN classifiers can be easily transferred across datasets, e.g. the transfer of a CNN trained to recognize objects on ImageNET to an object detector on Pascal VOC. Less clear, however, is the ability of CNNs to transfer knowledge across tasks. A common example of such transfer is the problem of scene classification that should leverage localized object detections to recognize holistic visual concepts. While this problem is currently addressed with Fisher vector representations, these are now shown ineffective for the high-dimensional and highly non-linear features extracted by modern CNNs. It is argued that this is mostly due to the reliance on a model, the Gaussian mixture of diagonal covariances, which has a very limited ability to capture the second order statistics of CNN features. This problem is addressed by the adoption of a better model, the mixture of factor analyzers (MFA), which approximates the non-linear data manifold by a collection of local subspaces. The Fisher score with respect to the MFA (MFA-FS) is derived and proposed as an image representation for holistic image classifiers. Extensive experiments show that the MFA-FS has state of the art performance for object-to-scene transfer and this transfer actually outperforms the training of a scene CNN from a large scene dataset. The two representations are also shown to be complementary, in the sense that their combination outperforms each of the representations by itself. When combined, they produce a state of the art scene classifier.", "bibtex": "@inproceedings{NIPS2016_856fc816,\n author = {Dixit, Mandar D and Vasconcelos, Nuno},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Object based Scene Representations using Fisher Scores of Local Subspace Projections},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/856fc81623da2150ba2210ba1b51d241-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/856fc81623da2150ba2210ba1b51d241-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/856fc81623da2150ba2210ba1b51d241-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/856fc81623da2150ba2210ba1b51d241-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/856fc81623da2150ba2210ba1b51d241-Reviews.html", "metareview": "", "pdf_size": 274004, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17790490550587143116&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Electrical and Computer Engineering, University of California, San Diego; Department of Electrical and Computer Engineering, University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu", "email": "ucsd.edu;ucsd.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/856fc81623da2150ba2210ba1b51d241-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Observational-Interventional Priors for Dose-Response Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7047", "id": "7047", "author": "Ricardo Silva", "abstract": "Controlled interventions provide the most direct source of information for learning causal effects. In particular, a dose-response curve can be learned by varying the treatment level and observing the corresponding outcomes. However, interventions can be expensive and time-consuming. Observational data, where the treatment is not controlled by a known mechanism, is sometimes available. Under some strong assumptions, observational data allows for the estimation of dose-response curves. Estimating such curves nonparametrically is hard: sample sizes for controlled interventions may be small, while in the observational case a large number of measured confounders may need to be marginalized. In this paper, we introduce a hierarchical Gaussian process prior that constructs a distribution over the dose-response curve by learning from observational data, and reshapes the distribution with a nonparametric affine transform learned from controlled interventions. This function composition from different sources is shown to speed-up learning, which we demonstrate with a thorough sensitivity analysis and an application to modeling the effect of therapy on cognitive skills of premature infants.", "bibtex": "@inproceedings{NIPS2016_aff16212,\n author = {Silva, Ricardo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Observational-Interventional Priors for Dose-Response Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/aff1621254f7c1be92f64550478c56e6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/aff1621254f7c1be92f64550478c56e6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/aff1621254f7c1be92f64550478c56e6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/aff1621254f7c1be92f64550478c56e6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/aff1621254f7c1be92f64550478c56e6-Reviews.html", "metareview": "", "pdf_size": 2277919, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12213566351732257617&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Department of Statistical Science and Centre for Computational Statistics and Machine Learning, University College London", "aff_domain": "stats.ucl.ac.uk", "email": "stats.ucl.ac.uk", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/aff1621254f7c1be92f64550478c56e6-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "University College London", "aff_unique_dep": "Department of Statistical Science", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "0", "aff_campus_unique": "London", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "On Explore-Then-Commit strategies", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6978", "id": "6978", "author_site": "Aur\u00e9lien Garivier, Tor Lattimore, Emilie Kaufmann", "author": "Aurelien Garivier; Tor Lattimore; Emilie Kaufmann", "abstract": "We study the problem of minimising regret in two-armed bandit problems with Gaussian rewards. Our objective is to use this simple setting to illustrate that strategies based on an exploration phase (up to a stopping time) followed by exploitation are necessarily suboptimal. The results hold regardless of whether or not the difference in means between the two arms is known. Besides the main message, we also refine existing deviation inequalities, which allow us to design fully sequential strategies with finite-time regret guarantees that are (a) asymptotically optimal as the horizon grows and (b) order-optimal in the minimax sense. Furthermore we provide empirical evidence that the theory also holds in practice and discuss extensions to non-gaussian and multiple-armed case.", "bibtex": "@inproceedings{NIPS2016_ef575e88,\n author = {Garivier, Aurelien and Lattimore, Tor and Kaufmann, Emilie},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Explore-Then-Commit strategies},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ef575e8837d065a1683c022d2077d342-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ef575e8837d065a1683c022d2077d342-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ef575e8837d065a1683c022d2077d342-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ef575e8837d065a1683c022d2077d342-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ef575e8837d065a1683c022d2077d342-Reviews.html", "metareview": "", "pdf_size": 290381, "gs_citation": 150, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12902735325960775274&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ef575e8837d065a1683c022d2077d342-Abstract.html" }, { "title": "On Graph Reconstruction via Empirical Risk Minimization: Fast Learning Rates and Scalability", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7048", "id": "7048", "author_site": "Guillaume Papa, Aur\u00e9lien Bellet, Stephan Cl\u00e9men\u00e7on", "author": "Guillaume Papa; Aur\u00e9lien Bellet; Stephan Cl\u00e9men\u00e7on", "abstract": "The problem of predicting connections between a set of data points finds many applications, in systems biology and social network analysis among others. This paper focuses on the \\textit{graph reconstruction} problem, where the prediction rule is obtained by minimizing the average error over all n(n-1)/2 possible pairs of the n nodes of a training graph. Our first contribution is to derive learning rates of order O(log n / n) for this problem, significantly improving upon the slow rates of order O(1/\u221an) established in the seminal work of Biau & Bleakley (2006). Strikingly, these fast rates are universal, in contrast to similar results known for other statistical learning problems (e.g., classification, density level set estimation, ranking, clustering) which require strong assumptions on the distribution of the data. Motivated by applications to large graphs, our second contribution deals with the computational complexity of graph reconstruction. Specifically, we investigate to which extent the learning rates can be preserved when replacing the empirical reconstruction risk by a computationally cheaper Monte-Carlo version, obtained by sampling with replacement B << n\u00b2 pairs of nodes. Finally, we illustrate our theoretical results by numerical experiments on synthetic and real graphs.", "bibtex": "@inproceedings{NIPS2016_a01a0380,\n author = {Papa, Guillaume and Bellet, Aur\\'{e}lien and Cl\\'{e}men\\c{c}on, Stephan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Graph Reconstruction via Empirical Risk Minimization: Fast Learning Rates and Scalability},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a01a0380ca3c61428c26a231f0e49a09-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a01a0380ca3c61428c26a231f0e49a09-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a01a0380ca3c61428c26a231f0e49a09-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a01a0380ca3c61428c26a231f0e49a09-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a01a0380ca3c61428c26a231f0e49a09-Reviews.html", "metareview": "", "pdf_size": 668904, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17214043454509009552&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "LTCI, CNRS, T\u00e9l\u00e9com ParisTech, Universit\u00e9 Paris-Saclay; LTCI, CNRS, T\u00e9l\u00e9com ParisTech, Universit\u00e9 Paris-Saclay; INRIA", "aff_domain": "telecom-paristech.fr;telecom-paristech.fr;inria.fr", "email": "telecom-paristech.fr;telecom-paristech.fr;inria.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a01a0380ca3c61428c26a231f0e49a09-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "CNRS;INRIA", "aff_unique_dep": "LTCI;", "aff_unique_url": "https://www.cnrs.fr;https://www.inria.fr", "aff_unique_abbr": "CNRS;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "On Mixtures of Markov Chains", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7321", "id": "7321", "author_site": "Rishi Gupta, Ravi Kumar, Sergei Vassilvitskii", "author": "Rishi Gupta; Ravi Kumar; Sergei Vassilvitskii", "abstract": "We study the problem of reconstructing a mixture of Markov chains from the trajectories generated by random walks through the state space. Under mild non-degeneracy conditions, we show that we can uniquely reconstruct the underlying chains by only considering trajectories of length three, which represent triples of states. Our algorithm is spectral in nature, and is easy to implement.", "bibtex": "@inproceedings{NIPS2016_8b570001,\n author = {Gupta, Rishi and Kumar, Ravi and Vassilvitskii, Sergei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Mixtures of Markov Chains},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8b5700012be65c9da25f49408d959ca0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8b5700012be65c9da25f49408d959ca0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8b5700012be65c9da25f49408d959ca0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8b5700012be65c9da25f49408d959ca0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8b5700012be65c9da25f49408d959ca0-Reviews.html", "metareview": "", "pdf_size": 460187, "gs_citation": 28, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11299238952048380619&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Stanford University; Google Research; Google Research", "aff_domain": "cs.stanford.edu;gmail.com;google.com", "email": "cs.stanford.edu;gmail.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8b5700012be65c9da25f49408d959ca0-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.stanford.edu;https://research.google", "aff_unique_abbr": "Stanford;Google Research", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "On Multiplicative Integration with Recurrent Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7129", "id": "7129", "author_site": "Yuhuai Wu, Saizheng Zhang, Ying Zhang, Yoshua Bengio, Russ Salakhutdinov", "author": "Yuhuai Wu; Saizheng Zhang; Ying Zhang; Yoshua Bengio; Ruslan Salakhutdinov", "abstract": "We introduce a general simple structural design called \u201cMultiplicative Integration\u201d (MI) to improve recurrent neural networks (RNNs). MI changes the way of how the information flow gets integrated in the computational building block of an RNN, while introducing almost no extra parameters. The new structure can be easily embedded into many popular RNN models, including LSTMs and GRUs. We empirically analyze its learning behaviour and conduct evaluations on several tasks using different RNN models. Our experimental results demonstrate that Multiplicative Integration can provide a substantial performance boost over many of the existing RNN models.", "bibtex": "@inproceedings{NIPS2016_f69e505b,\n author = {Wu, Yuhuai and Zhang, Saizheng and Zhang, Ying and Bengio, Yoshua and Salakhutdinov, Russ R},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Multiplicative Integration with Recurrent Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f69e505b08403ad2298b9f262659929a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f69e505b08403ad2298b9f262659929a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f69e505b08403ad2298b9f262659929a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f69e505b08403ad2298b9f262659929a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f69e505b08403ad2298b9f262659929a-Reviews.html", "metareview": "", "pdf_size": 667803, "gs_citation": 191, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17930548915839972836&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "University of Toronto; MILA, Universit\u00e9 de Montr\u00e9al; MILA, Universit\u00e9 de Montr\u00e9al; MILA, Universit\u00e9 de Montr\u00e9al + CIFAR; Carnegie Mellon University + CIFAR", "aff_domain": "cs.toronto.edu;umontreal.ca;umontreal.ca;umontreal.ca;cs.cmu.edu", "email": "cs.toronto.edu;umontreal.ca;umontreal.ca;umontreal.ca;cs.cmu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f69e505b08403ad2298b9f262659929a-Abstract.html", "aff_unique_index": "0;1;1;1+2;3+2", "aff_unique_norm": "University of Toronto;Universit\u00e9 de Montr\u00e9al;Canadian Institute for Advanced Research;Carnegie Mellon University", "aff_unique_dep": ";MILA;;", "aff_unique_url": "https://www.utoronto.ca;https://www.umontreal.ca;https://www.cifar.ca;https://www.cmu.edu", "aff_unique_abbr": "U of T;UdeM;CIFAR;CMU", "aff_campus_unique_index": "1;1;1;", "aff_campus_unique": ";Montr\u00e9al", "aff_country_unique_index": "0;0;0;0+0;1+0", "aff_country_unique": "Canada;United States" }, { "title": "On Regularizing Rademacher Observation Losses", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6904", "id": "6904", "author": "Richard Nock", "abstract": "It has recently been shown that supervised learning linear classifiers with two of the most popular losses, the logistic and square loss, is equivalent to optimizing an equivalent loss over sufficient statistics about the class: Rademacher observations (rados). It has also been shown that learning over rados brings solutions to two prominent problems for which the state of the art of learning from examples can be comparatively inferior and in fact less convenient: protecting and learning from private examples, learning from distributed datasets without entity resolution. Bis repetita placent: the two proofs of equivalence are different and rely on specific properties of the corresponding losses, so whether these can be unified and generalized inevitably comes to mind. This is our first contribution: we show how they can be fit into the same theory for the equivalence between example and rado losses. As a second contribution, we show that the generalization unveils a surprising new connection to regularized learning, and in particular a sufficient condition under which regularizing the loss over examples is equivalent to regularizing the rados (i.e. the data) in the equivalent rado loss, in such a way that an efficient algorithm for one regularized rado loss may be as efficient when changing the regularizer. This is our third contribution: we give a formal boosting algorithm for the regularized exponential rado-loss which boost with any of the ridge, lasso, \\slope, l_\\infty, or elastic nets, using the same master routine for all. Because the regularized exponential rado-loss is the equivalent of the regularized logistic loss over examples we obtain the first efficient proxy to the minimisation of the regularized logistic loss over examples using such a wide spectrum of regularizers. Experiments with a readily available code display that regularization significantly improves rado-based learning and compares favourably with example-based learning.", "bibtex": "@inproceedings{NIPS2016_9bf31c7f,\n author = {Nock, Richard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Regularizing Rademacher Observation Losses},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9bf31c7ff062936a96d3c8bd1f8f2ff3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9bf31c7ff062936a96d3c8bd1f8f2ff3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9bf31c7ff062936a96d3c8bd1f8f2ff3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9bf31c7ff062936a96d3c8bd1f8f2ff3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9bf31c7ff062936a96d3c8bd1f8f2ff3-Reviews.html", "metareview": "", "pdf_size": 415780, "gs_citation": 3, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16090092220875544943&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Data61, The Australian National University &The University of Sydney", "aff_domain": "data61.csiro.au", "email": "data61.csiro.au", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9bf31c7ff062936a96d3c8bd1f8f2ff3-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Australian National University", "aff_unique_dep": "", "aff_unique_url": "https://www.anu.edu.au", "aff_unique_abbr": "ANU", "aff_country_unique_index": "0", "aff_country_unique": "Australia" }, { "title": "On Robustness of Kernel Clustering", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8494", "id": "8494", "author_site": "Bowei Yan, Purnamrita Sarkar", "author": "Bowei Yan; Purnamrita Sarkar", "abstract": "Clustering is an important unsupervised learning problem in machine learning and statistics. Among many existing algorithms, kernel \\km has drawn much research attention due to its ability to find non-linear cluster boundaries and its inherent simplicity. There are two main approaches for kernel k-means: SVD of the kernel matrix and convex relaxations. Despite the attention kernel clustering has received both from theoretical and applied quarters, not much is known about robustness of the methods. In this paper we first introduce a semidefinite programming relaxation for the kernel clustering problem, then prove that under a suitable model specification, both K-SVD and SDP approaches are consistent in the limit, albeit SDP is strongly consistent, i.e. achieves exact recovery, whereas K-SVD is weakly consistent, i.e. the fraction of misclassified nodes vanish. Also the error bounds suggest that SDP is more resilient towards outliers, which we also demonstrate with experiments.", "bibtex": "@inproceedings{NIPS2016_b5a1fc20,\n author = {Yan, Bowei and Sarkar, Purnamrita},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Robustness of Kernel Clustering},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b5a1fc2085986034e448d2ccc5bb9703-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b5a1fc2085986034e448d2ccc5bb9703-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b5a1fc2085986034e448d2ccc5bb9703-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b5a1fc2085986034e448d2ccc5bb9703-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b5a1fc2085986034e448d2ccc5bb9703-Reviews.html", "metareview": "", "pdf_size": 331737, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14037992285918983015&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Statistics and Data Sciences, University of Texas at Austin; Department of Statistics and Data Sciences, University of Texas at Austin", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b5a1fc2085986034e448d2ccc5bb9703-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "Department of Statistics and Data Sciences", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "On Valid Optimal Assignment Kernels and Applications to Graph Classification", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7117", "id": "7117", "author_site": "Nils Kriege, Pierre-Louis Giscard, Richard Wilson", "author": "Nils M. Kriege; Pierre-Louis Giscard; Richard Wilson", "abstract": "The success of kernel methods has initiated the design of novel positive semidefinite functions, in particular for structured data. A leading design paradigm for this is the convolution kernel, which decomposes structured objects into their parts and sums over all pairs of parts. Assignment kernels, in contrast, are obtained from an optimal bijection between parts, which can provide a more valid notion of similarity. In general however, optimal assignments yield indefinite functions, which complicates their use in kernel methods. We characterize a class of base kernels used to compare parts that guarantees positive semidefinite optimal assignment kernels. These base kernels give rise to hierarchies from which the optimal assignment kernels are computed in linear time by histogram intersection. We apply these results by developing the Weisfeiler-Lehman optimal assignment kernel for graphs. It provides high classification accuracy on widely-used benchmark data sets improving over the original Weisfeiler-Lehman kernel.", "bibtex": "@inproceedings{NIPS2016_0efe3284,\n author = {Kriege, Nils M. and Giscard, Pierre-Louis and Wilson, Richard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On Valid Optimal Assignment Kernels and Applications to Graph Classification},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0efe32849d230d7f53049ddc4a4b0c60-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0efe32849d230d7f53049ddc4a4b0c60-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0efe32849d230d7f53049ddc4a4b0c60-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0efe32849d230d7f53049ddc4a4b0c60-Reviews.html", "metareview": "", "pdf_size": 389545, "gs_citation": 276, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12569553722760029249&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "TU Dortmund, Germany; University of York, UK; University of York, UK", "aff_domain": "tu-dortmund.de;york.ac.uk;york.ac.uk", "email": "tu-dortmund.de;york.ac.uk;york.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0efe32849d230d7f53049ddc4a4b0c60-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Technische Universit\u00e4t Dortmund;University of York", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-dortmund.de;https://www.york.ac.uk", "aff_unique_abbr": "TU Dortmund;York", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;United Kingdom" }, { "title": "On the Recursive Teaching Dimension of VC Classes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7332", "id": "7332", "author_site": "Peter Chen, Xi Chen, Yu Cheng, Bo Tang", "author": "Xi Chen; Xi Chen; Yu Cheng; Bo Tang", "abstract": "The recursive teaching dimension (RTD) of a concept class $C \\subseteq \\{0, 1\\}^n$, introduced by Zilles et al. [ZLHZ11], is a complexity parameter measured by the worst-case number of labeled examples needed to learn any target concept of $C$ in the recursive teaching model. In this paper, we study the quantitative relation between RTD and the well-known learning complexity measure VC dimension (VCD), and improve the best known upper and (worst-case) lower bounds on the recursive teaching dimension with respect to the VC dimension. Given a concept class $C \\subseteq \\{0, 1\\}^n$ with $VCD(C) = d$, we first show that $RTD(C)$ is at most $d 2^{d+1}$. This is the first upper bound for $RTD(C)$ that depends only on $VCD(C)$, independent of the size of the concept class $|C|$ and its~domain size $n$. Before our work, the best known upper bound for $RTD(C)$ is $O(d 2^d \\log \\log |C|)$, obtained by Moran et al. [MSWY15]. We remove the $\\log \\log |C|$ factor. We also improve the lower bound on the worst-case ratio of $RTD(C)$ to $VCD(C)$. We present a family of classes $\\{ C_k \\}_{k \\ge 1}$ with $VCD(C_k) = 3k$ and $RTD(C_k)=5k$, which implies that the ratio of $RTD(C)$ to $VCD(C)$ in the worst case can be as large as $5/3$. Before our work, the largest ratio known was $3/2$ as obtained by Kuhlmann [Kuh99]. Since then, no finite concept class $C$ has been known to satisfy $RTD(C) > (3/2) VCD(C)$.", "bibtex": "@inproceedings{NIPS2016_69a5b599,\n author = {Chen, Xi and Chen, Xi and Cheng, Yu and Tang, Bo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {On the Recursive Teaching Dimension of VC Classes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/69a5b5995110b36a9a347898d97a610e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/69a5b5995110b36a9a347898d97a610e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/69a5b5995110b36a9a347898d97a610e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/69a5b5995110b36a9a347898d97a610e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/69a5b5995110b36a9a347898d97a610e-Reviews.html", "metareview": "", "pdf_size": 267766, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1722390049510535816&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/69a5b5995110b36a9a347898d97a610e-Abstract.html" }, { "title": "One-vs-Each Approximation to Softmax for Scalable Estimation of Probabilities", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7070", "id": "7070", "author_site": "Michalis Titsias", "author": "Michalis Titsias RC AUEB", "abstract": "The softmax representation of probabilities for categorical variables plays a prominent role in modern machine learning with numerous applications in areas such as large scale classification, neural language modeling and recommendation systems. However, softmax estimation is very expensive for large scale inference because of the high cost associated with computing the normalizing constant. Here, we introduce an efficient approximation to softmax probabilities which takes the form of a rigorous lower bound on the exact probability. This bound is expressed as a product over pairwise probabilities and it leads to scalable estimation based on stochastic optimization. It allows us to perform doubly stochastic estimation by subsampling both training instances and class labels. We show that the new bound has interesting theoretical properties and we demonstrate its use in classification problems.", "bibtex": "@inproceedings{NIPS2016_814a9c18,\n author = {Titsias RC AUEB, Michalis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {One-vs-Each Approximation to Softmax for Scalable Estimation of Probabilities},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/814a9c18f5abff398787c9cfcbf3d80c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/814a9c18f5abff398787c9cfcbf3d80c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/814a9c18f5abff398787c9cfcbf3d80c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/814a9c18f5abff398787c9cfcbf3d80c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/814a9c18f5abff398787c9cfcbf3d80c-Reviews.html", "metareview": "", "pdf_size": 651380, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6557421534269289119&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Informatics, Athens University of Economics and Business", "aff_domain": "aueb.gr", "email": "aueb.gr", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/814a9c18f5abff398787c9cfcbf3d80c-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Athens University of Economics and Business", "aff_unique_dep": "Department of Informatics", "aff_unique_url": "https://www.aueb.gr", "aff_unique_abbr": "AUEB", "aff_campus_unique_index": "0", "aff_campus_unique": "Athens", "aff_country_unique_index": "0", "aff_country_unique": "Greece" }, { "title": "Online Bayesian Moment Matching for Topic Modeling with Unknown Number of Topics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7318", "id": "7318", "author_site": "Wei-Shou Hsu, Pascal Poupart", "author": "Wei-Shou Hsu; Pascal Poupart", "abstract": "Latent Dirichlet Allocation (LDA) is a very popular model for topic modeling as well as many other problems with latent groups. It is both simple and effective. When the number of topics (or latent groups) is unknown, the Hierarchical Dirichlet Process (HDP) provides an elegant non-parametric extension; however, it is a complex model and it is difficult to incorporate prior knowledge since the distribution over topics is implicit. We propose two new models that extend LDA in a simple and intuitive fashion by directly expressing a distribution over the number of topics. We also propose a new online Bayesian moment matching technique to learn the parameters and the number of topics of those models based on streaming data. The approach achieves higher log-likelihood than batch and online HDP with fixed hyperparameters on several corpora.", "bibtex": "@inproceedings{NIPS2016_0233f3bb,\n author = {Hsu, Wei-Shou and Poupart, Pascal},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online Bayesian Moment Matching for Topic Modeling with Unknown Number of Topics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0233f3bb964cf325a30f8b1c2ed2da93-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0233f3bb964cf325a30f8b1c2ed2da93-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0233f3bb964cf325a30f8b1c2ed2da93-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0233f3bb964cf325a30f8b1c2ed2da93-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0233f3bb964cf325a30f8b1c2ed2da93-Reviews.html", "metareview": "", "pdf_size": 297336, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11492076163612926244&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "David R. Cheriton School of Computer Science, University of Waterloo; David R. Cheriton School of Computer Science, University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca", "email": "uwaterloo.ca;uwaterloo.ca", "github": "https://github.com/whsu/bmm", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0233f3bb964cf325a30f8b1c2ed2da93-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "David R. Cheriton School of Computer Science", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UWaterloo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "title": "Online Convex Optimization with Unconstrained Domains and Losses", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7304", "id": "7304", "author_site": "Ashok Cutkosky, Kwabena A Boahen", "author": "Ashok Cutkosky; Kwabena A. Boahen", "abstract": "We propose an online convex optimization algorithm (RescaledExp) that achieves optimal regret in the unconstrained setting without prior knowledge of any bounds on the loss functions. We prove a lower bound showing an exponential separation between the regret of existing algorithms that require a known bound on the loss functions and any algorithm that does not require such knowledge. RescaledExp matches this lower bound asymptotically in the number of iterations. RescaledExp is naturally hyperparameter-free and we demonstrate empirically that it matches prior optimization algorithms that require hyperparameter optimization.", "bibtex": "@inproceedings{NIPS2016_550a141f,\n author = {Cutkosky, Ashok and Boahen, Kwabena A},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online Convex Optimization with Unconstrained Domains and Losses},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/550a141f12de6341fba65b0ad0433500-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/550a141f12de6341fba65b0ad0433500-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/550a141f12de6341fba65b0ad0433500-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/550a141f12de6341fba65b0ad0433500-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/550a141f12de6341fba65b0ad0433500-Reviews.html", "metareview": "", "pdf_size": 772650, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14655942992216046498&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Department of Computer Science, Stanford University; Department of Bioengineering, Stanford University", "aff_domain": "cs.stanford.edu;stanford.edu", "email": "cs.stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/550a141f12de6341fba65b0ad0433500-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Online ICA: Understanding Global Dynamics of Nonconvex Optimization via Diffusion Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7149", "id": "7149", "author_site": "Chris Junchi Li, Zhaoran Wang, Han Liu", "author": "Chris Junchi Li; Zhaoran Wang; Han Liu", "abstract": "Solving statistical learning problems often involves nonconvex optimization. Despite the empirical success of nonconvex statistical optimization methods, their global dynamics, especially convergence to the desirable local minima, remain less well understood in theory. In this paper, we propose a new analytic paradigm based on diffusion processes to characterize the global dynamics of nonconvex statistical optimization. As a concrete example, we study stochastic gradient descent (SGD) for the tensor decomposition formulation of independent component analysis. In particular, we cast different phases of SGD into diffusion processes, i.e., solutions to stochastic differential equations. Initialized from an unstable equilibrium, the global dynamics of SGD transit over three consecutive phases: (i) an unstable Ornstein-Uhlenbeck process slowly departing from the initialization, (ii) the solution to an ordinary differential equation, which quickly evolves towards the desirable local minimum, and (iii) a stable Ornstein-Uhlenbeck process oscillating around the desirable local minimum. Our proof techniques are based upon Stroock and Varadhan\u2019s weak convergence of Markov chains to diffusion processes, which are of independent interest.", "bibtex": "@inproceedings{NIPS2016_6ef80bb2,\n author = {Li, Chris Junchi and Wang, Zhaoran and Liu, Han},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online ICA: Understanding Global Dynamics of Nonconvex Optimization via Diffusion Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6ef80bb237adf4b6f77d0700e1255907-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6ef80bb237adf4b6f77d0700e1255907-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6ef80bb237adf4b6f77d0700e1255907-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6ef80bb237adf4b6f77d0700e1255907-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6ef80bb237adf4b6f77d0700e1255907-Reviews.html", "metareview": "", "pdf_size": 1654106, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3654866305827051089&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Operations Research and Financial Engineering, Princeton University; Department of Operations Research and Financial Engineering, Princeton University; Department of Operations Research and Financial Engineering, Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu", "email": "princeton.edu;princeton.edu;princeton.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6ef80bb237adf4b6f77d0700e1255907-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "Department of Operations Research and Financial Engineering", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Online Pricing with Strategic and Patient Buyers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7237", "id": "7237", "author_site": "Michal Feldman, Tomer Koren, Roi Livni, Yishay Mansour, Aviv Zohar", "author": "Michal Feldman; Tomer Koren; Roi Livni; Yishay Mansour; Aviv Zohar", "abstract": "We consider a seller with an unlimited supply of a single good, who is faced with a stream of $T$ buyers. Each buyer has a window of time in which she would like to purchase, and would buy at the lowest price in that window, provided that this price is lower than her private value (and otherwise, would not buy at all). In this setting, we give an algorithm that attains $O(T^{2/3})$ regret over any sequence of $T$ buyers with respect to the best fixed price in hindsight, and prove that no algorithm can perform better in the worst case.", "bibtex": "@inproceedings{NIPS2016_44968aec,\n author = {Feldman, Michal and Koren, Tomer and Livni, Roi and Mansour, Yishay and Zohar, Aviv},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online Pricing with Strategic and Patient Buyers},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/44968aece94f667e4095002d140b5896-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/44968aece94f667e4095002d140b5896-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/44968aece94f667e4095002d140b5896-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/44968aece94f667e4095002d140b5896-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/44968aece94f667e4095002d140b5896-Reviews.html", "metareview": "", "pdf_size": 282239, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4817895553071006617&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Tel-Aviv University+MSR Herzliya; Google Brain; Princeton University; Tel-Aviv University; Hebrew University of Jerusalem", "aff_domain": "cs.tau.ac.il;google.com;cs.princeton.edu;tau.ac.il;cs.huji.ac.il", "email": "cs.tau.ac.il;google.com;cs.princeton.edu;tau.ac.il;cs.huji.ac.il", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/44968aece94f667e4095002d140b5896-Abstract.html", "aff_unique_index": "0+1;2;3;0;4", "aff_unique_norm": "Tel Aviv University;Microsoft;Google;Princeton University;Hebrew University of Jerusalem", "aff_unique_dep": ";Microsoft Research;Google Brain;;", "aff_unique_url": "https://www.tau.ac.il;https://www.microsoft.com/en-us/research/group/microsoft-research-herzliya;https://brain.google.com;https://www.princeton.edu;https://www.huji.ac.il", "aff_unique_abbr": "TAU;MSR;Google Brain;Princeton;HUJI", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Herzliya;Mountain View;Jerusalem", "aff_country_unique_index": "0+0;1;1;0;0", "aff_country_unique": "Israel;United States" }, { "title": "Online and Differentially-Private Tensor Decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6985", "id": "6985", "author_site": "Yining Wang, Anima Anandkumar", "author": "Yining Wang; Anima Anandkumar", "abstract": "Tensor decomposition is positioned to be a pervasive tool in the era of big data. In this paper, we resolve many of the key algorithmic questions regarding robustness, memory efficiency, and differential privacy of tensor decomposition. We propose simple variants of the tensor power method which enjoy these strong properties. We propose the first streaming method with a linear memory requirement. Moreover, we present a noise calibrated tensor power method with efficient privacy guarantees. At the heart of all these guarantees lies a careful perturbation analysis derived in this paper which improves up on the existing results significantly.", "bibtex": "@inproceedings{NIPS2016_7eb7eabb,\n author = {Wang, Yining and Anandkumar, Anima},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Online and Differentially-Private Tensor Decomposition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7eb7eabbe9bd03c2fc99881d04da9cbd-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7eb7eabbe9bd03c2fc99881d04da9cbd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7eb7eabbe9bd03c2fc99881d04da9cbd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7eb7eabbe9bd03c2fc99881d04da9cbd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7eb7eabbe9bd03c2fc99881d04da9cbd-Reviews.html", "metareview": "", "pdf_size": 364371, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12051013250832369063&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Machine Learning Department, Carnegie Mellon University; Department of EECS, University of California, Irvine", "aff_domain": "cs.cmu.edu;uci.edu", "email": "cs.cmu.edu;uci.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7eb7eabbe9bd03c2fc99881d04da9cbd-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Carnegie Mellon University;University of California, Irvine", "aff_unique_dep": "Machine Learning Department;Department of EECS", "aff_unique_url": "https://www.cmu.edu;https://www.uci.edu", "aff_unique_abbr": "CMU;UCI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "e1392e9c2b", "title": "Only H is left: Near-tight Episodic PAC RL", "site": "https://papers.nips.cc/paper_files/paper/2016/hash/5d616dd38211ebb5d6ec52986674b6e4-Abstract.html", "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "author": "", "aff": "", "aff_domain": "", "email": "", "github": "", "project": "", "author_num": 1 }, { "title": "Operator Variational Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7350", "id": "7350", "author_site": "Rajesh Ranganath, Dustin Tran, Jaan Altosaar, David Blei", "author": "Rajesh Ranganath; Dustin Tran; Jaan Altosaar; David Blei", "abstract": "Variational inference is an umbrella term for algorithms which cast Bayesian inference as optimization. Classically, variational inference uses the Kullback-Leibler divergence to define the optimization. Though this divergence has been widely used, the resultant posterior approximation can suffer from undesirable statistical properties. To address this, we reexamine variational inference from its roots as an optimization problem. We use operators, or functions of functions, to design variational objectives. As one example, we design a variational objective with a Langevin-Stein operator. We develop a black box algorithm, operator variational inference (OPVI), for optimizing any operator objective. Importantly, operators enable us to make explicit the statistical and computational tradeoffs for variational inference. We can characterize different properties of variational objectives, such as objectives that admit data subsampling---allowing inference to scale to massive data---as well as objectives that admit variational programs---a rich class of posterior approximations that does not require a tractable density. We illustrate the benefits of OPVI on a mixture model and a generative model of images.", "bibtex": "@inproceedings{NIPS2016_d947bf06,\n author = {Ranganath, Rajesh and Tran, Dustin and Altosaar, Jaan and Blei, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Operator Variational Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d947bf06a885db0d477d707121934ff8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d947bf06a885db0d477d707121934ff8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d947bf06a885db0d477d707121934ff8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d947bf06a885db0d477d707121934ff8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d947bf06a885db0d477d707121934ff8-Reviews.html", "metareview": "", "pdf_size": 265610, "gs_citation": 132, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14255868610291402619&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d947bf06a885db0d477d707121934ff8-Abstract.html" }, { "title": "Optimal Architectures in a Solvable Model of Deep Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7246", "id": "7246", "author_site": "Jonathan Kadmon, Haim Sompolinsky", "author": "Jonathan Kadmon; Haim Sompolinsky", "abstract": "Deep neural networks have received a considerable attention due to the success of their training for real world machine learning applications. They are also of great interest to the understanding of sensory processing in cortical sensory hierarchies. The purpose of this work is to advance our theoretical understanding of the computational benefits of these architectures. Using a simple model of clustered noisy inputs and a simple learning rule, we provide analytically derived recursion relations describing the propagation of the signals along the deep network. By analysis of these equations, and defining performance measures, we show that these model networks have optimal depths. We further explore the dependence of the optimal architecture on the system parameters.", "bibtex": "@inproceedings{NIPS2016_0fe47339,\n author = {Kadmon, Jonathan and Sompolinsky, Haim},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimal Architectures in a Solvable Model of Deep Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0fe473396242072e84af286632d3f0ff-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0fe473396242072e84af286632d3f0ff-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0fe473396242072e84af286632d3f0ff-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0fe473396242072e84af286632d3f0ff-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0fe473396242072e84af286632d3f0ff-Reviews.html", "metareview": "", "pdf_size": 1477696, "gs_citation": 37, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3173941374738078757&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "The Racah Institute of Physics and ELSC, The Hebrew University, Israel; The Racah Institute of Physics and ELSC, The Hebrew University, Israel + Center for Brain Science, Harvard University", "aff_domain": "mail.huji.ac.il; ", "email": "mail.huji.ac.il; ", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0fe473396242072e84af286632d3f0ff-Abstract.html", "aff_unique_index": "0;0+1", "aff_unique_norm": "Hebrew University;Harvard University", "aff_unique_dep": "Racah Institute of Physics and ELSC;Center for Brain Science", "aff_unique_url": "http://www.huji.ac.il;https://www.harvard.edu", "aff_unique_abbr": "HUJI;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0+1", "aff_country_unique": "Israel;United States" }, { "title": "Optimal Binary Classifier Aggregation for General Losses", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7181", "id": "7181", "author_site": "Akshay Balsubramani, Yoav S Freund", "author": "Akshay Balsubramani; Yoav S Freund", "abstract": "We address the problem of aggregating an ensemble of predictors with known loss bounds in a semi-supervised binary classification setting, to minimize prediction loss incurred on the unlabeled data. We find the minimax optimal predictions for a very general class of loss functions including all convex and many non-convex losses, extending a recent analysis of the problem for misclassification error. The result is a family of semi-supervised ensemble aggregation algorithms which are as efficient as linear learning by convex optimization, but are minimax optimal without any relaxations. Their decision rules take a form familiar in decision theory -- applying sigmoid functions to a notion of ensemble margin -- without the assumptions typically made in margin-based learning.", "bibtex": "@inproceedings{NIPS2016_eaa52f33,\n author = {Balsubramani, Akshay and Freund, Yoav S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimal Binary Classifier Aggregation for General Losses},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/eaa52f3366768bca401dca9ea5b181dd-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/eaa52f3366768bca401dca9ea5b181dd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/eaa52f3366768bca401dca9ea5b181dd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/eaa52f3366768bca401dca9ea5b181dd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/eaa52f3366768bca401dca9ea5b181dd-Reviews.html", "metareview": "", "pdf_size": 339232, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1786565276649562169&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of California, San Diego; University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu", "email": "ucsd.edu;ucsd.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/eaa52f3366768bca401dca9ea5b181dd-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Optimal Black-Box Reductions Between Optimization Objectives", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7316", "id": "7316", "author_site": "Zeyuan Allen-Zhu, Elad Hazan", "author": "Zeyuan Allen-Zhu; Elad Hazan", "abstract": "The diverse world of machine learning applications has given rise to a plethora of algorithms and optimization methods, finely tuned to the specific regression or classification task at hand. We reduce the complexity of algorithm design for machine learning by reductions: we develop reductions that take a method developed for one setting and apply it to the entire spectrum of smoothness and strong-convexity in applications. Furthermore, unlike existing results, our new reductions are OPTIMAL and more PRACTICAL. We show how these new reductions give rise to new and faster running times on training linear classifiers for various families of loss functions, and conclude with experiments showing their successes also in practice.", "bibtex": "@inproceedings{NIPS2016_1f50893f,\n author = {Allen-Zhu, Zeyuan and Hazan, Elad},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimal Black-Box Reductions Between Optimization Objectives},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1f50893f80d6830d62765ffad7721742-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1f50893f80d6830d62765ffad7721742-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1f50893f80d6830d62765ffad7721742-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1f50893f80d6830d62765ffad7721742-Reviews.html", "metareview": "", "pdf_size": 1077436, "gs_citation": 112, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4491025992728244511&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Institute for Advanced Study + Princeton University; Princeton University", "aff_domain": "csail.mit.edu;cs.princeton.edu", "email": "csail.mit.edu;cs.princeton.edu", "github": "", "project": "https://arxiv.org/abs/1603.05642", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1f50893f80d6830d62765ffad7721742-Abstract.html", "aff_unique_index": "0+1;1", "aff_unique_norm": "Institute for Advanced Study;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://ias.edu;https://www.princeton.edu", "aff_unique_abbr": "IAS;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0", "aff_country_unique": "United States" }, { "title": "Optimal Cluster Recovery in the Labeled Stochastic Block Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7028", "id": "7028", "author_site": "Se-Young Yun, Alexandre Proutiere", "author": "Se-Young Yun; Alexandre Proutiere", "abstract": "We consider the problem of community detection or clustering in the labeled Stochastic Block Model (LSBM) with a finite number $K$ of clusters of sizes linearly growing with the global population of items $n$. Every pair of items is labeled independently at random, and label $\\ell$ appears with probability $p(i,j,\\ell)$ between two items in clusters indexed by $i$ and $j$, respectively. The objective is to reconstruct the clusters from the observation of these random labels. Clustering under the SBM and their extensions has attracted much attention recently. Most existing work aimed at characterizing the set of parameters such that it is possible to infer clusters either positively correlated with the true clusters, or with a vanishing proportion of misclassified items, or exactly matching the true clusters. We find the set of parameters such that there exists a clustering algorithm with at most $s$ misclassified items in average under the general LSBM and for any $s=o(n)$, which solves one open problem raised in \\cite{abbe2015community}. We further develop an algorithm, based on simple spectral methods, that achieves this fundamental performance limit within $O(n \\mbox{polylog}(n))$ computations and without the a-priori knowledge of the model parameters.", "bibtex": "@inproceedings{NIPS2016_a8849b05,\n author = {Yun, Se-Young and Proutiere, Alexandre},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimal Cluster Recovery in the Labeled Stochastic Block Model},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a8849b052492b5106526b2331e526138-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a8849b052492b5106526b2331e526138-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a8849b052492b5106526b2331e526138-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a8849b052492b5106526b2331e526138-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a8849b052492b5106526b2331e526138-Reviews.html", "metareview": "", "pdf_size": 282778, "gs_citation": 107, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=171728765122519467&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "CNLS, Los Alamos National Lab.; Automatic Control Dept., KTH", "aff_domain": "lanl.gov;kth.se", "email": "lanl.gov;kth.se", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a8849b052492b5106526b2331e526138-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Los Alamos National Laboratory;KTH Royal Institute of Technology", "aff_unique_dep": "Center for Nonlinear Studies;Department of Automatic Control", "aff_unique_url": "https://www.lanl.gov;https://www.kth.se", "aff_unique_abbr": "LANL;KTH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Sweden" }, { "title": "Optimal Learning for Multi-pass Stochastic Gradient Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7068", "id": "7068", "author_site": "Junhong Lin, Lorenzo Rosasco", "author": "Junhong Lin; Lorenzo Rosasco", "abstract": "We analyze the learning properties of the stochastic gradient method when multiple passes over the data and mini-batches are allowed. In particular, we consider the square loss and show that for a universal step-size choice, the number of passes acts as a regularization parameter, and optimal finite sample bounds can be achieved by early-stopping. Moreover, we show that larger step-sizes are allowed when considering mini-batches. Our analysis is based on a unifying approach, encompassing both batch and stochastic gradient methods as special cases.", "bibtex": "@inproceedings{NIPS2016_fe40fb94,\n author = {Lin, Junhong and Rosasco, Lorenzo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimal Learning for Multi-pass Stochastic Gradient Methods},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fe40fb944ee700392ed51bfe84dd4e3d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fe40fb944ee700392ed51bfe84dd4e3d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/fe40fb944ee700392ed51bfe84dd4e3d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fe40fb944ee700392ed51bfe84dd4e3d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fe40fb944ee700392ed51bfe84dd4e3d-Reviews.html", "metareview": "", "pdf_size": 430340, "gs_citation": 44, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8289201576966089286&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "LCSL, IIT-MIT, USA; DIBRIS, Univ. Genova, ITALY + LCSL, IIT-MIT, USA", "aff_domain": "iit.it;mit.edu", "email": "iit.it;mit.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fe40fb944ee700392ed51bfe84dd4e3d-Abstract.html", "aff_unique_index": "0;1+0", "aff_unique_norm": "Indian Institute of Technology, Madras;University of Genoa", "aff_unique_dep": "LCSL;DIBRIS (Department of Informatics, Bioengineering, Robotics and Systems Engineering)", "aff_unique_url": "https://www.iitm.ac.in;https://www.unige.it", "aff_unique_abbr": "IIT-M;Univ. Genova", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Madras;", "aff_country_unique_index": "0;1+0", "aff_country_unique": "India;Italy" }, { "title": "Optimal Sparse Linear Encoders and Sparse PCA", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7164", "id": "7164", "author_site": "Malik Magdon-Ismail, Christos Boutsidis", "author": "Malik Magdon-Ismail; Christos Boutsidis", "abstract": "Principal components analysis~(PCA) is the optimal linear encoder of data. Sparse linear encoders (e.g., sparse PCA) produce more interpretable features that can promote better generalization. (\\rn{1}) Given a level of sparsity, what is the best approximation to PCA? (\\rn{2}) Are there efficient algorithms which can achieve this optimal combinatorial tradeoff? We answer both questions by providing the first polynomial-time algorithms to construct \\emph{optimal} sparse linear auto-encoders; additionally, we demonstrate the performance of our algorithms on real data.", "bibtex": "@inproceedings{NIPS2016_0e65972d,\n author = {Magdon-Ismail, Malik and Boutsidis, Christos},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimal Sparse Linear Encoders and Sparse PCA},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0e65972dce68dad4d52d063967f0a705-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0e65972dce68dad4d52d063967f0a705-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0e65972dce68dad4d52d063967f0a705-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0e65972dce68dad4d52d063967f0a705-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0e65972dce68dad4d52d063967f0a705-Reviews.html", "metareview": "", "pdf_size": 194442, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8104107885771891742&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Rensselaer Polytechnic Institute, Troy, NY 12211; New York, NY", "aff_domain": "cs.rpi.edu;gmail.com", "email": "cs.rpi.edu;gmail.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0e65972dce68dad4d52d063967f0a705-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Rensselaer Polytechnic Institute;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.rpi.edu;https://www.nyu.edu", "aff_unique_abbr": "RPI;NYU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Troy;New York", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Optimal Tagging with Markov Chain Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7197", "id": "7197", "author_site": "Nir Rosenfeld, Amir Globerson", "author": "Nir Rosenfeld; Amir Globerson", "abstract": "Many information systems use tags and keywords to describe and annotate content. These allow for efficient organization and categorization of items, as well as facilitate relevant search queries. As such, the selected set of tags for an item can have a considerable effect on the volume of traffic that eventually reaches an item. In tagging systems where tags are exclusively chosen by an item's owner, who in turn is interested in maximizing traffic, a principled approach for assigning tags can prove valuable. In this paper we introduce the problem of optimal tagging, where the task is to choose a subset of tags for a new item such that the probability of browsing users reaching that item is maximized. We formulate the problem by modeling traffic using a Markov chain, and asking how transitions in this chain should be modified to maximize traffic into a certain state of interest. The resulting optimization problem involves maximizing a certain function over subsets, under a cardinality constraint. We show that the optimization problem is NP-hard, but has a (1-1/e)-approximation via a simple greedy algorithm due to monotonicity and submodularity. Furthermore, the structure of the problem allows for an efficient computation of the greedy step. To demonstrate the effectiveness of our method, we perform experiments on three tagging datasets, and show that the greedy algorithm outperforms other baselines.", "bibtex": "@inproceedings{NIPS2016_c8ed21db,\n author = {Rosenfeld, Nir and Globerson, Amir},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimal Tagging with Markov Chain Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c8ed21db4f678f3b13b9d5ee16489088-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c8ed21db4f678f3b13b9d5ee16489088-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c8ed21db4f678f3b13b9d5ee16489088-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c8ed21db4f678f3b13b9d5ee16489088-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c8ed21db4f678f3b13b9d5ee16489088-Reviews.html", "metareview": "", "pdf_size": 286243, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5147313569809833791&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "School of Computer Science and Engineering, Hebrew University of Jerusalem; The Blavatnik School of Computer Science, Tel Aviv University", "aff_domain": "mail.huji.ac.il;post.tau.ac.il", "email": "mail.huji.ac.il;post.tau.ac.il", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c8ed21db4f678f3b13b9d5ee16489088-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Hebrew University of Jerusalem;Tel Aviv University", "aff_unique_dep": "School of Computer Science and Engineering;Blavatnik School of Computer Science", "aff_unique_url": "http://www.huji.ac.il;https://www.tau.ac.il", "aff_unique_abbr": "HUJI;TAU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Jerusalem;Tel Aviv", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "title": "Optimal spectral transportation with application to music transcription", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7108", "id": "7108", "author_site": "R\u00e9mi Flamary, C\u00e9dric F\u00e9votte, Nicolas Courty, Valentin Emiya", "author": "R\u00e9mi Flamary; C\u00e9dric F\u00e9votte; Nicolas Courty; Valentin Emiya", "abstract": "Many spectral unmixing methods rely on the non-negative decomposition of spectral data onto a dictionary of spectral templates. In particular, state-of-the-art music transcription systems decompose the spectrogram of the input signal onto a dictionary of representative note spectra. The typical measures of fit used to quantify the adequacy of the decomposition compare the data and template entries frequency-wise. As such, small displacements of energy from a frequency bin to another as well as variations of timber can disproportionally harm the fit. We address these issues by means of optimal transportation and propose a new measure of fit that treats the frequency distributions of energy holistically as opposed to frequency-wise. Building on the harmonic nature of sound, the new measure is invariant to shifts of energy to harmonically-related frequencies, as well as to small and local displacements of energy. Equipped with this new measure of fit, the dictionary of note templates can be considerably simplified to a set of Dirac vectors located at the target fundamental frequencies (musical pitch values). This in turns gives ground to a very fast and simple decomposition algorithm that achieves state-of-the-art performance on real musical data.", "bibtex": "@inproceedings{NIPS2016_352fe25d,\n author = {Flamary, R\\'{e}mi and F\\'{e}votte, C\\'{e}dric and Courty, Nicolas and Emiya, Valentin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimal spectral transportation with application to music transcription},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/352fe25daf686bdb4edca223c921acea-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/352fe25daf686bdb4edca223c921acea-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/352fe25daf686bdb4edca223c921acea-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/352fe25daf686bdb4edca223c921acea-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/352fe25daf686bdb4edca223c921acea-Reviews.html", "metareview": "", "pdf_size": 915367, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16043982180687380138&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 18, "aff": "Universit\u00e9 C\u00f4te d\u2019Azur, CNRS, OCA; CNRS, IRIT, Toulouse; Universit\u00e9 de Bretagne Sud, CNRS, IRISA; Aix-Marseille Universit\u00e9, CNRS, LIF", "aff_domain": "unice.fr;irit.fr;univ-ubs.fr;lif.univ-mrs.fr", "email": "unice.fr;irit.fr;univ-ubs.fr;lif.univ-mrs.fr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/352fe25daf686bdb4edca223c921acea-Abstract.html", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Universit\u00e9 C\u00f4te d\u2019Azur;CNRS;Universit\u00e9 de Bretagne Sud;Aix-Marseille Universit\u00e9", "aff_unique_dep": ";IRIT;;CNRS, LIF", "aff_unique_url": "https://www.univ-cotedazur.fr;https://www.cnrs.fr;https://www.univ-ubs.fr;https://www.univ-amu.fr", "aff_unique_abbr": "UCA;CNRS;UBS;AMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "title": "Optimistic Bandit Convex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6953", "id": "6953", "author_site": "Scott Yang, Mehryar Mohri", "author": "Scott Yang; Mehryar Mohri", "abstract": "We introduce the general and powerful scheme of predicting information re-use in optimization algorithms. This allows us to devise a computationally efficient algorithm for bandit convex optimization with new state-of-the-art guarantees for both Lipschitz loss functions and loss functions with Lipschitz gradients. This is the first algorithm admitting both a polynomial time complexity and a regret that is polynomial in the dimension of the action space that improves upon the original regret bound for Lipschitz loss functions, achieving a regret of $\\widetilde O(T^{11/16}d^{3/8})$. Our algorithm further improves upon the best existing polynomial-in-dimension bound (both computationally and in terms of regret) for loss functions with Lipschitz gradients, achieving a regret of $\\widetilde O(T^{8/13} d^{5/3})$.", "bibtex": "@inproceedings{NIPS2016_b20bb95a,\n author = {Yang, Scott and Mohri, Mehryar},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimistic Bandit Convex Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b20bb95ab626d93fd976af958fbc61ba-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b20bb95ab626d93fd976af958fbc61ba-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b20bb95ab626d93fd976af958fbc61ba-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b20bb95ab626d93fd976af958fbc61ba-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b20bb95ab626d93fd976af958fbc61ba-Reviews.html", "metareview": "", "pdf_size": 354383, "gs_citation": 22, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15666533893502700183&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "Courant Institute and Google; Courant Institute", "aff_domain": "cims.nyu.edu;cims.nyu.edu", "email": "cims.nyu.edu;cims.nyu.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b20bb95ab626d93fd976af958fbc61ba-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Courant Institute of Mathematical Sciences", "aff_unique_dep": "Mathematical Sciences", "aff_unique_url": "https://courant.nyu.edu", "aff_unique_abbr": "Courant", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Optimistic Gittins Indices", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7190", "id": "7190", "author_site": "Eli Gutin, Vivek Farias", "author": "Eli Gutin; Vivek Farias", "abstract": "Starting with the Thomspon sampling algorithm, recent years have seen a resurgence of interest in Bayesian algorithms for the Multi-armed Bandit (MAB) problem. These algorithms seek to exploit prior information on arm biases and while several have been shown to be regret optimal, their design has not emerged from a principled approach. In contrast, if one cared about Bayesian regret discounted over an infinite horizon at a fixed, pre-specified rate, the celebrated Gittins index theorem offers an optimal algorithm. Unfortunately, the Gittins analysis does not appear to carry over to minimizing Bayesian regret over all sufficiently large horizons and computing a Gittins index is onerous relative to essentially any incumbent index scheme for the Bayesian MAB problem. The present paper proposes a sequence of 'optimistic' approximations to the Gittins index. We show that the use of these approximations in concert with the use of an increasing discount factor appears to offer a compelling alternative to a variety of index schemes proposed for the Bayesian MAB problem in recent years. In addition, we show that the simplest of these approximations yields regret that matches the Lai-Robbins lower bound, including achieving matching constants.", "bibtex": "@inproceedings{NIPS2016_452bf208,\n author = {Gutin, Eli and Farias, Vivek},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimistic Gittins Indices},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/452bf208bf901322968557227b8f6efe-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/452bf208bf901322968557227b8f6efe-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/452bf208bf901322968557227b8f6efe-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/452bf208bf901322968557227b8f6efe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/452bf208bf901322968557227b8f6efe-Reviews.html", "metareview": "", "pdf_size": 1855083, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1188217331358336675&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Operations Research Center, MIT; MIT Sloan School of Management", "aff_domain": "mit.edu;mit.edu", "email": "mit.edu;mit.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/452bf208bf901322968557227b8f6efe-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Operations Research Center", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Optimizing affinity-based binary hashing using auxiliary coordinates", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7177", "id": "7177", "author_site": "Ramin Raziperchikolaei, Miguel A. Carreira-Perpinan", "author": "Ramin Raziperchikolaei; Miguel A. Carreira-Perpinan", "abstract": "In supervised binary hashing, one wants to learn a function that maps a high-dimensional feature vector to a vector of binary codes, for application to fast image retrieval. This typically results in a difficult optimization problem, nonconvex and nonsmooth, because of the discrete variables involved. Much work has simply relaxed the problem during training, solving a continuous optimization, and truncating the codes a posteriori. This gives reasonable results but is quite suboptimal. Recent work has tried to optimize the objective directly over the binary codes and achieved better results, but the hash function was still learned a posteriori, which remains suboptimal. We propose a general framework for learning hash functions using affinity-based loss functions that uses auxiliary coordinates. This closes the loop and optimizes jointly over the hash functions and the binary codes so that they gradually match each other. The resulting algorithm can be seen as an iterated version of the procedure of optimizing first over the codes and then learning the hash function. Compared to this, our optimization is guaranteed to obtain better hash functions while being not much slower, as demonstrated experimentally in various supervised datasets. In addition, our framework facilitates the design of optimization algorithms for arbitrary types of loss and hash functions.", "bibtex": "@inproceedings{NIPS2016_c5ff2543,\n author = {Raziperchikolaei, Ramin and Carreira-Perpinan, Miguel A.},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Optimizing affinity-based binary hashing using auxiliary coordinates},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c5ff2543b53f4cc0ad3819a36752467b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c5ff2543b53f4cc0ad3819a36752467b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c5ff2543b53f4cc0ad3819a36752467b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c5ff2543b53f4cc0ad3819a36752467b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c5ff2543b53f4cc0ad3819a36752467b-Reviews.html", "metareview": "", "pdf_size": 201460, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14499946299968387044&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "EECS, University of California, Merced; EECS, University of California, Merced", "aff_domain": "ucmerced.edu;ucmerced.edu", "email": "ucmerced.edu;ucmerced.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c5ff2543b53f4cc0ad3819a36752467b-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Merced", "aff_unique_dep": "EECS", "aff_unique_url": "https://www.ucmerced.edu", "aff_unique_abbr": "UC Merced", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Merced", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Orthogonal Random Features", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7415", "id": "7415", "author_site": "Felix Xinnan Yu, Ananda Theertha Suresh, Krzysztof M Choromanski, Daniel Holtmann-Rice, Sanjiv Kumar", "author": "Felix Xinnan X Yu; Ananda Theertha Suresh; Krzysztof M Choromanski; Daniel N Holtmann-Rice; Sanjiv Kumar", "abstract": "We present an intriguing discovery related to Random Fourier Features: replacing multiplication by a random Gaussian matrix with multiplication by a properly scaled random orthogonal matrix significantly decreases kernel approximation error. We call this technique Orthogonal Random Features (ORF), and provide theoretical and empirical justification for its effectiveness. Motivated by the discovery, we further propose Structured Orthogonal Random Features (SORF), which uses a class of structured discrete orthogonal matrices to speed up the computation. The method reduces the time cost from $\\mathcal{O}(d^2)$ to $\\mathcal{O}(d \\log d)$, where $d$ is the data dimensionality, with almost no compromise in kernel approximation quality compared to ORF. Experiments on several datasets verify the effectiveness of ORF and SORF over the existing methods. We also provide discussions on using the same type of discrete orthogonal structure for a broader range of kernels and applications.", "bibtex": "@inproceedings{NIPS2016_53adaf49,\n author = {Yu, Felix Xinnan X and Suresh, Ananda Theertha and Choromanski, Krzysztof M and Holtmann-Rice, Daniel N and Kumar, Sanjiv},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Orthogonal Random Features},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/53adaf494dc89ef7196d73636eb2451b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/53adaf494dc89ef7196d73636eb2451b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/53adaf494dc89ef7196d73636eb2451b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/53adaf494dc89ef7196d73636eb2451b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/53adaf494dc89ef7196d73636eb2451b-Reviews.html", "metareview": "", "pdf_size": 725248, "gs_citation": 266, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2255404596905507668&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Google Research, New York; Google Research, New York; Google Research, New York; Google Research, New York; Google Research, New York", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/53adaf494dc89ef7196d73636eb2451b-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "New York", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "PAC Reinforcement Learning with Rich Observations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6918", "id": "6918", "author_site": "Akshay Krishnamurthy, Alekh Agarwal, John Langford", "author": "Akshay Krishnamurthy; Alekh Agarwal; John Langford", "abstract": "We propose and study a new model for reinforcement learning with rich observations, generalizing contextual bandits to sequential decision making. These models require an agent to take actions based on observations (features) with the goal of achieving long-term performance competitive with a large set of policies. To avoid barriers to sample-efficient learning associated with large observation spaces and general POMDPs, we focus on problems that can be summarized by a small number of hidden states and have long-term rewards that are predictable by a reactive function class. In this setting, we design and analyze a new reinforcement learning algorithm, Least Squares Value Elimination by Exploration. We prove that the algorithm learns near optimal behavior after a number of episodes that is polynomial in all relevant parameters, logarithmic in the number of policies, and independent of the size of the observation space. Our result provides theoretical justification for reinforcement learning with function approximation.", "bibtex": "@inproceedings{NIPS2016_2387337b,\n author = {Krishnamurthy, Akshay and Agarwal, Alekh and Langford, John},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {PAC Reinforcement Learning with Rich Observations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2387337ba1e0b0249ba90f55b2ba2521-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2387337ba1e0b0249ba90f55b2ba2521-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2387337ba1e0b0249ba90f55b2ba2521-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2387337ba1e0b0249ba90f55b2ba2521-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2387337ba1e0b0249ba90f55b2ba2521-Reviews.html", "metareview": "", "pdf_size": 424710, "gs_citation": 202, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11079759271586060484&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of Massachusetts, Amherst; Microsoft Research; Microsoft Research", "aff_domain": "cs.umass.edu;microsoft.com;microsoft.com", "email": "cs.umass.edu;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2387337ba1e0b0249ba90f55b2ba2521-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Massachusetts Amherst;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.umass.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UMass Amherst;MSR", "aff_campus_unique_index": "0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "PAC-Bayesian Theory Meets Bayesian Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6889", "id": "6889", "author_site": "Pascal Germain, Francis Bach, Alexandre Lacoste, Simon Lacoste-Julien", "author": "Pascal Germain; Francis Bach; Alexandre Lacoste; Simon Lacoste-Julien", "abstract": "We exhibit a strong link between frequentist PAC-Bayesian bounds and the Bayesian marginal likelihood. That is, for the negative log-likelihood loss function, we show that the minimization of PAC-Bayesian generalization bounds maximizes the Bayesian marginal likelihood. This provides an alternative explanation to the Bayesian Occam's razor criteria, under the assumption that the data is generated by an i.i.d. distribution. Moreover, as the negative log-likelihood is an unbounded loss function, we motivate and propose a PAC-Bayesian theorem tailored for the sub-gamma loss family, and we show that our approach is sound on classical Bayesian linear regression tasks.", "bibtex": "@inproceedings{NIPS2016_84d2004b,\n author = {Germain, Pascal and Bach, Francis and Lacoste, Alexandre and Lacoste-Julien, Simon},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {PAC-Bayesian Theory Meets Bayesian Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/84d2004bf28a2095230e8e14993d398d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/84d2004bf28a2095230e8e14993d398d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/84d2004bf28a2095230e8e14993d398d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/84d2004bf28a2095230e8e14993d398d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/84d2004bf28a2095230e8e14993d398d-Reviews.html", "metareview": "", "pdf_size": 1253210, "gs_citation": 228, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16391430069914305742&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "INRIA Paris - \u00c9cole Normale Sup\u00e9rieure; INRIA Paris - \u00c9cole Normale Sup\u00e9rieure; Google; INRIA Paris - \u00c9cole Normale Sup\u00e9rieure", "aff_domain": "inria.fr;inria.fr;google.com;inria.fr", "email": "inria.fr;inria.fr;google.com;inria.fr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/84d2004bf28a2095230e8e14993d398d-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "INRIA Paris;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.inria.fr;https://www.google.com", "aff_unique_abbr": "INRIA;Google", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Paris;Mountain View", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "France;United States" }, { "title": "Pairwise Choice Markov Chains", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7044", "id": "7044", "author_site": "Stephen Ragain, Johan Ugander", "author": "Stephen Ragain; Johan Ugander", "abstract": "As datasets capturing human choices grow in richness and scale, particularly in online domains, there is an increasing need for choice models flexible enough to handle data that violate traditional choice-theoretic axioms such as regularity, stochastic transitivity, or Luce's choice axiom. In this work we introduce the Pairwise Choice Markov Chain (PCMC) model of discrete choice, an inferentially tractable model that does not assume these traditional axioms while still satisfying the foundational axiom of uniform expansion, which can be viewed as a weaker version of Luce's axiom. We show that the PCMC model significantly outperforms the Multinomial Logit (MNL) model in prediction tasks on two empirical data sets known to exhibit violations of Luce's axiom. Our analysis also synthesizes several recent observations connecting the Multinomial Logit model and Markov chains; the PCMC model retains the Multinomial Logit model as a special case.", "bibtex": "@inproceedings{NIPS2016_bcb41ccd,\n author = {Ragain, Stephen and Ugander, Johan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Pairwise Choice Markov Chains},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/bcb41ccdc4363c6848a1d760f26c28a0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/bcb41ccdc4363c6848a1d760f26c28a0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/bcb41ccdc4363c6848a1d760f26c28a0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/bcb41ccdc4363c6848a1d760f26c28a0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/bcb41ccdc4363c6848a1d760f26c28a0-Reviews.html", "metareview": "", "pdf_size": 284671, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11435811666081297453&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Management Science & Engineering, Stanford University; Management Science & Engineering, Stanford University", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/bcb41ccdc4363c6848a1d760f26c28a0-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "Management Science & Engineering", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Parameter Learning for Log-supermodular Distributions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7295", "id": "7295", "author_site": "Tatiana Shpakova, Francis Bach", "author": "Tatiana Shpakova; Francis Bach", "abstract": "We consider log-supermodular models on binary variables, which are probabilistic models with negative log-densities which are submodular. These models provide probabilistic interpretations of common combinatorial optimization tasks such as image segmentation. In this paper, we focus primarily on parameter estimation in the models from known upper-bounds on the intractable log-partition function. We show that the bound based on separable optimization on the base polytope of the submodular function is always inferior to a bound based on ``perturb-and-MAP'' ideas. Then, to learn parameters, given that our approximation of the log-partition function is an expectation (over our own randomization), we use a stochastic subgradient technique to maximize a lower-bound on the log-likelihood. This can also be extended to conditional maximum likelihood. We illustrate our new results in a set of experiments in binary image denoising, where we highlight the flexibility of a probabilistic model to learn with missing data.", "bibtex": "@inproceedings{NIPS2016_e9fd7c2c,\n author = {Shpakova, Tatiana and Bach, Francis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Parameter Learning for Log-supermodular Distributions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e9fd7c2c6623306db59b6aef5c0d5cac-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e9fd7c2c6623306db59b6aef5c0d5cac-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e9fd7c2c6623306db59b6aef5c0d5cac-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e9fd7c2c6623306db59b6aef5c0d5cac-Reviews.html", "metareview": "", "pdf_size": 326586, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15099004960134403363&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "INRIA - \u00c9cole Normale Sup\u00e9rieure Paris; INRIA - \u00c9cole Normale Sup\u00e9rieure Paris", "aff_domain": "inria.fr;inria.fr", "email": "inria.fr;inria.fr", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e9fd7c2c6623306db59b6aef5c0d5cac-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Path-Normalized Optimization of Recurrent Neural Networks with ReLU Activations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7075", "id": "7075", "author_site": "Behnam Neyshabur, Yuhuai Wu, Russ Salakhutdinov, Nati Srebro", "author": "Behnam Neyshabur; Yuhuai Wu; Ruslan Salakhutdinov; Nati Srebro", "abstract": "We investigate the parameter-space geometry of recurrent neural networks (RNNs), and develop an adaptation of path-SGD optimization method, attuned to this geometry, that can learn plain RNNs with ReLU activations. On several datasets that require capturing long-term dependency structure, we show that path-SGD can significantly improve trainability of ReLU RNNs compared to RNNs trained with SGD, even with various recently suggested initialization schemes.", "bibtex": "@inproceedings{NIPS2016_74563ba2,\n author = {Neyshabur, Behnam and Wu, Yuhuai and Salakhutdinov, Russ R and Srebro, Nati},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Path-Normalized Optimization of Recurrent Neural Networks with ReLU Activations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/74563ba21a90da13dacf2a73e3ddefa7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/74563ba21a90da13dacf2a73e3ddefa7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/74563ba21a90da13dacf2a73e3ddefa7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/74563ba21a90da13dacf2a73e3ddefa7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/74563ba21a90da13dacf2a73e3ddefa7-Reviews.html", "metareview": "", "pdf_size": 833394, "gs_citation": 38, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9776142670751850226&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Toyota Technological Institute at Chicago; University of Toronto; Carnegie Mellon University; Toyota Technological Institute at Chicago", "aff_domain": "ttic.edu;cs.toronto.edu;cs.cmu.edu;ttic.edu", "email": "ttic.edu;cs.toronto.edu;cs.cmu.edu;ttic.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/74563ba21a90da13dacf2a73e3ddefa7-Abstract.html", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Toyota Technological Institute at Chicago;University of Toronto;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tti-chicago.org;https://www.utoronto.ca;https://www.cmu.edu", "aff_unique_abbr": "TTI Chicago;U of T;CMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Canada" }, { "title": "PerforatedCNNs: Acceleration through Elimination of Redundant Convolutions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7270", "id": "7270", "author_site": "Mikhail Figurnov, Aizhan Ibraimova, Dmitry Vetrov, Pushmeet Kohli", "author": "Mikhail Figurnov; Aizhan Ibraimova; Dmitry P Vetrov; Pushmeet Kohli", "abstract": "We propose a novel approach to reduce the computational cost of evaluation of convolutional neural networks, a factor that has hindered their deployment in low-power devices such as mobile phones. Inspired by the loop perforation technique from source code optimization, we speed up the bottleneck convolutional layers by skipping their evaluation in some of the spatial positions. We propose and analyze several strategies of choosing these positions. We demonstrate that perforation can accelerate modern convolutional networks such as AlexNet and VGG-16 by a factor of 2x - 4x. Additionally, we show that perforation is complementary to the recently proposed acceleration method of Zhang et al.", "bibtex": "@inproceedings{NIPS2016_f0e52b27,\n author = {Figurnov, Mikhail and Ibraimova, Aizhan and Vetrov, Dmitry P and Kohli, Pushmeet},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {PerforatedCNNs: Acceleration through Elimination of Redundant Convolutions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f0e52b27a7a5d6a1a87373dffa53dbe5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f0e52b27a7a5d6a1a87373dffa53dbe5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f0e52b27a7a5d6a1a87373dffa53dbe5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f0e52b27a7a5d6a1a87373dffa53dbe5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f0e52b27a7a5d6a1a87373dffa53dbe5-Reviews.html", "metareview": "", "pdf_size": 553908, "gs_citation": 198, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=235744167207877109&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "National Research University Higher School of Economics+Lomonosov Moscow State University; Skolkovo Institute of Science and Technology; National Research University Higher School of Economics+Yandex; Microsoft Research", "aff_domain": "figurnov.ru;gmail.com;yandex.ru;microsoft.com", "email": "figurnov.ru;gmail.com;yandex.ru;microsoft.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f0e52b27a7a5d6a1a87373dffa53dbe5-Abstract.html", "aff_unique_index": "0+1;2;0+3;4", "aff_unique_norm": "National Research University Higher School of Economics;Lomonosov Moscow State University;Skolkovo Institute of Science and Technology;Yandex;Microsoft", "aff_unique_dep": ";;;;Microsoft Research", "aff_unique_url": "https://hse.ru;https://www.msu.ru;https://www.skoltech.ru;https://yandex.com;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "HSE;MSU;Skoltech;Yandex;MSR", "aff_campus_unique_index": "1;", "aff_campus_unique": ";Moscow", "aff_country_unique_index": "0+0;0;0+0;1", "aff_country_unique": "Russian Federation;United States" }, { "title": "Perspective Transformer Nets: Learning Single-View 3D Object Reconstruction without 3D Supervision", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7241", "id": "7241", "author_site": "Xinchen Yan, Jimei Yang, Ersin Yumer, Yijie Guo, Honglak Lee", "author": "Xinchen Yan; Jimei Yang; Ersin Yumer; Yijie Guo; Honglak Lee", "abstract": "Understanding the 3D world is a fundamental problem in computer vision. However, learning a good representation of 3D objects is still an open problem due to the high dimensionality of the data and many factors of variation involved. In this work, we investigate the task of single-view 3D object reconstruction from a learning agent's perspective. We formulate the learning process as an interaction between 3D and 2D representations and propose an encoder-decoder network with a novel projection loss defined by the projective transformation. More importantly, the projection loss enables the unsupervised learning using 2D observation without explicit 3D supervision. We demonstrate the ability of the model in generating 3D volume from a single 2D image with three sets of experiments: (1) learning from single-class objects; (2) learning from multi-class objects and (3) testing on novel object classes. Results show superior performance and better generalization ability for 3D object reconstruction when the projection loss is involved.", "bibtex": "@inproceedings{NIPS2016_e820a45f,\n author = {Yan, Xinchen and Yang, Jimei and Yumer, Ersin and Guo, Yijie and Lee, Honglak},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Perspective Transformer Nets: Learning Single-View 3D Object Reconstruction without 3D Supervision},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e820a45f1dfc7b95282d10b6087e11c0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e820a45f1dfc7b95282d10b6087e11c0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/e820a45f1dfc7b95282d10b6087e11c0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e820a45f1dfc7b95282d10b6087e11c0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e820a45f1dfc7b95282d10b6087e11c0-Reviews.html", "metareview": "", "pdf_size": 15387396, "gs_citation": 683, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13986075700848568161&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "University of Michigan, Ann Arbor; Adobe Research; Adobe Research; University of Michigan, Ann Arbor; University of Michigan, Ann Arbor + Google Brain", "aff_domain": "umich.edu;adobe.com;adobe.com;umich.edu;umich.edu", "email": "umich.edu;adobe.com;adobe.com;umich.edu;umich.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e820a45f1dfc7b95282d10b6087e11c0-Abstract.html", "aff_unique_index": "0;1;1;0;0+2", "aff_unique_norm": "University of Michigan;Adobe;Google", "aff_unique_dep": ";Adobe Research;Google Brain", "aff_unique_url": "https://www.umich.edu;https://research.adobe.com;https://brain.google.com", "aff_unique_abbr": "UM;Adobe;Google Brain", "aff_campus_unique_index": "0;0;0+2", "aff_campus_unique": "Ann Arbor;;Mountain View", "aff_country_unique_index": "0;0;0;0;0+0", "aff_country_unique": "United States" }, { "title": "Phased Exploration with Greedy Exploitation in Stochastic Combinatorial Partial Monitoring Games", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7029", "id": "7029", "author_site": "Sougata Chaudhuri, Ambuj Tewari", "author": "Sougata Chaudhuri; Ambuj Tewari", "abstract": "Partial monitoring games are repeated games where the learner receives feedback that might be different from adversary's move or even the reward gained by the learner. Recently, a general model of combinatorial partial monitoring (CPM) games was proposed \\cite{lincombinatorial2014}, where the learner's action space can be exponentially large and adversary samples its moves from a bounded, continuous space, according to a fixed distribution. The paper gave a confidence bound based algorithm (GCB) that achieves $O(T^{2/3}\\log T)$ distribution independent and $O(\\log T)$ distribution dependent regret bounds. The implementation of their algorithm depends on two separate offline oracles and the distribution dependent regret additionally requires existence of a unique optimal action for the learner. Adopting their CPM model, our first contribution is a Phased Exploration with Greedy Exploitation (PEGE) algorithmic framework for the problem. Different algorithms within the framework achieve $O(T^{2/3}\\sqrt{\\log T})$ distribution independent and $O(\\log^2 T)$ distribution dependent regret respectively. Crucially, our framework needs only the simpler ``argmax'' oracle from GCB and the distribution dependent regret does not require existence of a unique optimal action. Our second contribution is another algorithm, PEGE2, which combines gap estimation with a PEGE algorithm, to achieve an $O(\\log T)$ regret bound, matching the GCB guarantee but removing the dependence on size of the learner's action space. However, like GCB, PEGE2 requires access to both offline oracles and the existence of a unique optimal action. Finally, we discuss how our algorithm can be efficiently applied to a CPM problem of practical interest: namely, online ranking with feedback at the top.", "bibtex": "@inproceedings{NIPS2016_b51a15f3,\n author = {Chaudhuri, Sougata and Tewari, Ambuj},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Phased Exploration with Greedy Exploitation in Stochastic Combinatorial Partial Monitoring Games},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b51a15f382ac914391a58850ab343b00-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b51a15f382ac914391a58850ab343b00-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b51a15f382ac914391a58850ab343b00-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b51a15f382ac914391a58850ab343b00-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b51a15f382ac914391a58850ab343b00-Reviews.html", "metareview": "", "pdf_size": 274175, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11884753281330225578&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Statistics, University of Michigan Ann Arbor; Department of Statistics and Department of EECS, University of Michigan Ann Arbor", "aff_domain": "umich.edu;umich.edu", "email": "umich.edu;umich.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b51a15f382ac914391a58850ab343b00-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "Department of Statistics", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Phased LSTM: Accelerating Recurrent Network Training for Long or Event-based Sequences", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7222", "id": "7222", "author_site": "Daniel Neil, Michael Pfeiffer, Shih-Chii Liu", "author": "Daniel Neil; Michael Pfeiffer; Shih-Chii Liu", "abstract": "Recurrent Neural Networks (RNNs) have become the state-of-the-art choice for extracting patterns from temporal sequences. Current RNN models are ill suited to process irregularly sampled data triggered by events generated in continuous time by sensors or other neurons. Such data can occur, for example, when the input comes from novel event-driven artificial sensors which generate sparse, asynchronous streams of events or from multiple conventional sensors with different update intervals. In this work, we introduce the Phased LSTM model, which extends the LSTM unit by adding a new time gate. This gate is controlled by a parametrized oscillation with a frequency range which require updates of the memory cell only during a small percentage of the cycle. Even with the sparse updates imposed by the oscillation, the Phased LSTM network achieves faster convergence than regular LSTMs on tasks which require learning of long sequences. The model naturally integrates inputs from sensors of arbitrary sampling rates, thereby opening new areas of investigation for processing asynchronous sensory events that carry timing information. It also greatly improves the performance of LSTMs in standard RNN applications, and does so with an order-of-magnitude fewer computes.", "bibtex": "@inproceedings{NIPS2016_5bce843d,\n author = {Neil, Daniel and Pfeiffer, Michael and Liu, Shih-Chii},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Phased LSTM: Accelerating Recurrent Network Training for Long or Event-based Sequences},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5bce843dd76db8c939d5323dd3e54ec9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5bce843dd76db8c939d5323dd3e54ec9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/5bce843dd76db8c939d5323dd3e54ec9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5bce843dd76db8c939d5323dd3e54ec9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5bce843dd76db8c939d5323dd3e54ec9-Reviews.html", "metareview": "", "pdf_size": 1135528, "gs_citation": 604, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4526199262641685887&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5bce843dd76db8c939d5323dd3e54ec9-Abstract.html" }, { "title": "Poisson-Gamma dynamical systems", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7424", "id": "7424", "author_site": "Aaron Schein, Hanna Wallach, Mingyuan Zhou", "author": "Aaron Schein; Hanna Wallach; Mingyuan Zhou", "abstract": "This paper presents a dynamical system based on the Poisson-Gamma construction for sequentially observed multivariate count data. Inherent to the model is a novel Bayesian nonparametric prior that ties and shrinks parameters in a powerful way. We develop theory about the model's infinite limit and its steady-state. The model's inductive bias is demonstrated on a variety of real-world datasets where it is shown to learn interpretable structure and have superior predictive performance.", "bibtex": "@inproceedings{NIPS2016_8169e05e,\n author = {Schein, Aaron and Wallach, Hanna and Zhou, Mingyuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Poisson-Gamma dynamical systems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8169e05e2a0debcb15458f2cc1eff0ea-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8169e05e2a0debcb15458f2cc1eff0ea-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8169e05e2a0debcb15458f2cc1eff0ea-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8169e05e2a0debcb15458f2cc1eff0ea-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8169e05e2a0debcb15458f2cc1eff0ea-Reviews.html", "metareview": "", "pdf_size": 615701, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12902281669484445460&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "College of Information and Computer Sciences, University of Massachusetts Amherst; McCombs School of Business, The University of Texas at Austin; Microsoft Research New York", "aff_domain": "cs.umass.edu;mccombs.utexas.edu;dirichlet.net", "email": "cs.umass.edu;mccombs.utexas.edu;dirichlet.net", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8169e05e2a0debcb15458f2cc1eff0ea-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Massachusetts Amherst;University of Texas at Austin;Microsoft", "aff_unique_dep": "College of Information and Computer Sciences;McCombs School of Business;Microsoft Research", "aff_unique_url": "https://www.umass.edu;https://www.mccombs.utexas.edu;https://www.microsoft.com/en-us/research/group/microsoft-research-new-york", "aff_unique_abbr": "UMass Amherst;UT Austin;MSR NY", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Amherst;Austin;New York", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Preference Completion from Partial Rankings", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7260", "id": "7260", "author_site": "Suriya Gunasekar, Sanmi Koyejo, Joydeep Ghosh", "author": "Suriya Gunasekar; Oluwasanmi O Koyejo; Joydeep Ghosh", "abstract": "We propose a novel and efficient algorithm for the collaborative preference completion problem, which involves jointly estimating individualized rankings for a set of entities over a shared set of items, based on a limited number of observed affinity values. Our approach exploits the observation that while preferences are often recorded as numerical scores, the predictive quantity of interest is the underlying rankings. Thus, attempts to closely match the recorded scores may lead to overfitting and impair generalization performance. Instead, we propose an estimator that directly fits the underlying preference order, combined with nuclear norm constraints to encourage low--rank parameters. Besides (approximate) correctness of the ranking order, the proposed estimator makes no generative assumption on the numerical scores of the observations. One consequence is that the proposed estimator can fit any consistent partial ranking over a subset of the items represented as a directed acyclic graph (DAG), generalizing standard techniques that can only fit preference scores. Despite this generality, for supervision representing total or blockwise total orders, the computational complexity of our algorithm is within a $\\log$ factor of the standard algorithms for nuclear norm regularization based estimates for matrix completion. We further show promising empirical results for a novel and challenging application of collaboratively ranking of the associations between brain--regions and cognitive neuroscience terms.", "bibtex": "@inproceedings{NIPS2016_872488f8,\n author = {Gunasekar, Suriya and Koyejo, Oluwasanmi O and Ghosh, Joydeep},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Preference Completion from Partial Rankings},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/872488f88d1b2db54d55bc8bba2fad1b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/872488f88d1b2db54d55bc8bba2fad1b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/872488f88d1b2db54d55bc8bba2fad1b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/872488f88d1b2db54d55bc8bba2fad1b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/872488f88d1b2db54d55bc8bba2fad1b-Reviews.html", "metareview": "", "pdf_size": 444820, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17556883143786866735&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of Texas, Austin, TX, USA; University of Illinois, Urbana-Champaign, IL, USA; University of Texas, Austin, TX, USA", "aff_domain": "utexas.edu;illinois.edu;ece.utexas.edu", "email": "utexas.edu;illinois.edu;ece.utexas.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/872488f88d1b2db54d55bc8bba2fad1b-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Texas at Austin;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://illinois.edu", "aff_unique_abbr": "UT Austin;UIUC", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Austin;Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Privacy Odometers and Filters: Pay-as-you-Go Composition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7262", "id": "7262", "author_site": "Ryan Rogers, Salil Vadhan, Aaron Roth, Jonathan Ullman", "author": "Ryan M Rogers; Aaron Roth; Jonathan Ullman; Salil Vadhan", "abstract": "In this paper we initiate the study of adaptive composition in differential privacy when the length of the composition, and the privacy parameters themselves can be chosen adaptively, as a function of the outcome of previously run analyses. This case is much more delicate than the setting covered by existing composition theorems, in which the algorithms themselves can be chosen adaptively, but the privacy parameters must be fixed up front. Indeed, it isn't even clear how to define differential privacy in the adaptive parameter setting. We proceed by defining two objects which cover the two main use cases of composition theorems. A privacy filter is a stopping time rule that allows an analyst to halt a computation before his pre-specified privacy budget is exceeded. A privacy odometer allows the analyst to track realized privacy loss as he goes, without needing to pre-specify a privacy budget. We show that unlike the case in which privacy parameters are fixed, in the adaptive parameter setting, these two use cases are distinct. We show that there exist privacy filters with bounds comparable (up to constants) with existing privacy composition theorems. We also give a privacy odometer that nearly matches non-adaptive private composition theorems, but is sometimes worse by a small asymptotic factor. Moreover, we show that this is inherent, and that any valid privacy odometer in the adaptive parameter setting must lose this factor, which shows a formal separation between the filter and odometer use-cases.", "bibtex": "@inproceedings{NIPS2016_58c54802,\n author = {Rogers, Ryan M and Roth, Aaron and Ullman, Jonathan and Vadhan, Salil},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Privacy Odometers and Filters: Pay-as-you-Go Composition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/58c54802a9fb9526cd0923353a34a7ae-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/58c54802a9fb9526cd0923353a34a7ae-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/58c54802a9fb9526cd0923353a34a7ae-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/58c54802a9fb9526cd0923353a34a7ae-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/58c54802a9fb9526cd0923353a34a7ae-Reviews.html", "metareview": "", "pdf_size": 331112, "gs_citation": 122, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4194405406884595505&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 16, "aff": "Department of Applied Mathematics and Computational Science, University of Pennsylvania; Department of Computer and Information Sciences, University of Pennsylvania; College of Computer and Information Science, Northeastern University; Center for Research on Computation & Society and John A. Paulson School of Engineering & Applied Sciences, Harvard University", "aff_domain": "sas.upenn.edu;cis.upenn.edu;ccs.neu.edu;seas.harvard.edu", "email": "sas.upenn.edu;cis.upenn.edu;ccs.neu.edu;seas.harvard.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/58c54802a9fb9526cd0923353a34a7ae-Abstract.html", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Pennsylvania;Northeastern University;Harvard University", "aff_unique_dep": "Department of Applied Mathematics and Computational Science;College of Computer and Information Science;Center for Research on Computation & Society", "aff_unique_url": "https://www.upenn.edu;https://www.northeastern.edu;https://www.harvard.edu", "aff_unique_abbr": "UPenn;NU;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Probabilistic Inference with Generating Functions for Poisson Latent Variable Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7035", "id": "7035", "author_site": "Kevin Winner, Daniel Sheldon", "author": "Kevin Winner; Daniel R. Sheldon", "abstract": "Graphical models with latent count variables arise in a number of fields. Standard exact inference techniques such as variable elimination and belief propagation do not apply to these models because the latent variables have countably infinite support. As a result, approximations such as truncation or MCMC are employed. We present the first exact inference algorithms for a class of models with latent count variables by developing a novel representation of countably infinite factors as probability generating functions, and then performing variable elimination with generating functions. Our approach is exact, runs in pseudo-polynomial time, and is much faster than existing approximate techniques. It leads to better parameter estimates for problems in population ecology by avoiding error introduced by approximate likelihood computations.", "bibtex": "@inproceedings{NIPS2016_6c1da886,\n author = {Winner, Kevin and Sheldon, Daniel R},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Probabilistic Inference with Generating Functions for Poisson Latent Variable Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6c1da886822c67822bcf3679d04369fa-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6c1da886822c67822bcf3679d04369fa-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6c1da886822c67822bcf3679d04369fa-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6c1da886822c67822bcf3679d04369fa-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6c1da886822c67822bcf3679d04369fa-Reviews.html", "metareview": "", "pdf_size": 888791, "gs_citation": 9, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16967251853782645856&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "College of Information and Computer Sciences, University of Massachusetts Amherst; College of Information and Computer Sciences, University of Massachusetts Amherst + Department of Computer Science, Mount Holyoke College", "aff_domain": "cs.umass.edu;cs.umass.edu", "email": "cs.umass.edu;cs.umass.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6c1da886822c67822bcf3679d04369fa-Abstract.html", "aff_unique_index": "0;0+1", "aff_unique_norm": "University of Massachusetts Amherst;Mount Holyoke College", "aff_unique_dep": "College of Information and Computer Sciences;Department of Computer Science", "aff_unique_url": "https://www.umass.edu;https://www.mtholyoke.edu", "aff_unique_abbr": "UMass Amherst;MHC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0+0", "aff_country_unique": "United States" }, { "title": "Probabilistic Linear Multistep Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6973", "id": "6973", "author_site": "Onur Teymur, Kostas Zygalakis, Ben Calderhead", "author": "Onur Teymur; Kostas Zygalakis; Ben Calderhead", "abstract": "We present a derivation and theoretical investigation of the Adams-Bashforth and Adams-Moulton family of linear multistep methods for solving ordinary differential equations, starting from a Gaussian process (GP) framework. In the limit, this formulation coincides with the classical deterministic methods, which have been used as higher-order initial value problem solvers for over a century. Furthermore, the natural probabilistic framework provided by the GP formulation allows us to derive probabilistic versions of these methods, in the spirit of a number of other probabilistic ODE solvers presented in the recent literature. In contrast to higher-order Runge-Kutta methods, which require multiple intermediate function evaluations per step, Adams family methods make use of previous function evaluations, so that increased accuracy arising from a higher-order multistep approach comes at very little additional computational cost. We show that through a careful choice of covariance function for the GP, the posterior mean and standard deviation over the numerical solution can be made to exactly coincide with the value given by the deterministic method and its local truncation error respectively. We provide a rigorous proof of the convergence of these new methods, as well as an empirical investigation (up to fifth order) demonstrating their convergence rates in practice.", "bibtex": "@inproceedings{NIPS2016_23c97e9c,\n author = {Teymur, Onur and Zygalakis, Kostas and Calderhead, Ben},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Probabilistic Linear Multistep Methods},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/23c97e9cb93576e45d2feaf00d0e8502-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/23c97e9cb93576e45d2feaf00d0e8502-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/23c97e9cb93576e45d2feaf00d0e8502-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/23c97e9cb93576e45d2feaf00d0e8502-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/23c97e9cb93576e45d2feaf00d0e8502-Reviews.html", "metareview": "", "pdf_size": 1732617, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15018909525618484922&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Department of Mathematics, Imperial College London; School of Mathematics, University of Edinburgh; Department of Mathematics, Imperial College London", "aff_domain": "teymur.uk;ed.ac.uk;imperial.ac.uk", "email": "teymur.uk;ed.ac.uk;imperial.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/23c97e9cb93576e45d2feaf00d0e8502-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Imperial College London;University of Edinburgh", "aff_unique_dep": "Department of Mathematics;School of Mathematics", "aff_unique_url": "https://www.imperial.ac.uk;https://www.ed.ac.uk", "aff_unique_abbr": "Imperial;Edinburgh", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "London;Edinburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Probing the Compositionality of Intuitive Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7024", "id": "7024", "author_site": "Eric Schulz, Josh Tenenbaum, David Duvenaud, Maarten Speekenbrink, Samuel J Gershman", "author": "Eric Schulz; Josh Tenenbaum; David K. Duvenaud; Maarten Speekenbrink; Samuel J Gershman", "abstract": "How do people learn about complex functional structure? Taking inspiration from other areas of cognitive science, we propose that this is accomplished by harnessing compositionality: complex structure is decomposed into simpler building blocks. We formalize this idea within the framework of Bayesian regression using a grammar over Gaussian process kernels. We show that participants prefer compositional over non-compositional function extrapolations, that samples from the human prior over functions are best described by a compositional model, and that people perceive compositional functions as more predictable than their non-compositional but otherwise similar counterparts. We argue that the compositional nature of intuitive functions is consistent with broad principles of human cognition.", "bibtex": "@inproceedings{NIPS2016_49ad23d1,\n author = {Schulz, Eric and Tenenbaum, Josh and Duvenaud, David K and Speekenbrink, Maarten and Gershman, Samuel J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Probing the Compositionality of Intuitive Functions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/49ad23d1ec9fa4bd8d77d02681df5cfa-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/49ad23d1ec9fa4bd8d77d02681df5cfa-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/49ad23d1ec9fa4bd8d77d02681df5cfa-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/49ad23d1ec9fa4bd8d77d02681df5cfa-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/49ad23d1ec9fa4bd8d77d02681df5cfa-Reviews.html", "metareview": "", "pdf_size": 522405, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=259396779593010692&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 16, "aff": "University College London; MIT; University of Toronto; University College London; Harvard University", "aff_domain": "cs.ucl.ac.uk;mit.edu;cs.toronto.edu;ucl.ac.uk;fas.harvard.edu", "email": "cs.ucl.ac.uk;mit.edu;cs.toronto.edu;ucl.ac.uk;fas.harvard.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/49ad23d1ec9fa4bd8d77d02681df5cfa-Abstract.html", "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "University College London;Massachusetts Institute of Technology;University of Toronto;Harvard University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucl.ac.uk;https://web.mit.edu;https://www.utoronto.ca;https://www.harvard.edu", "aff_unique_abbr": "UCL;MIT;U of T;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;1", "aff_country_unique": "United Kingdom;United States;Canada" }, { "title": "Professor Forcing: A New Algorithm for Training Recurrent Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7377", "id": "7377", "author_site": "Alex M Lamb, Anirudh Goyal, Ying Zhang, Saizheng Zhang, Aaron Courville, Yoshua Bengio", "author": "Alex M Lamb; Anirudh Goyal ALIAS PARTH GOYAL; Ying Zhang; Saizheng Zhang; Aaron C. Courville; Yoshua Bengio", "abstract": "The Teacher Forcing algorithm trains recurrent networks by supplying observed sequence values as inputs during training and using the network\u2019s own one-step-ahead predictions to do multi-step sampling. We introduce the Professor Forcing algorithm, which uses adversarial domain adaptation to encourage the dynamics of the recurrent network to be the same when training the network and when sampling from the network over multiple time steps. We apply Professor Forcing to language modeling, vocal synthesis on raw waveforms, handwriting generation, and image generation. Empirically we find that Professor Forcing acts as a regularizer, improving test likelihood on character level Penn Treebank and sequential MNIST. We also find that the model qualitatively improves samples, especially when sampling for a large number of time steps. This is supported by human evaluation of sample quality. Trade-offs between Professor Forcing and Scheduled Sampling are discussed. We produce T-SNEs showing that Professor Forcing successfully makes the dynamics of the network during training and sampling more similar.", "bibtex": "@inproceedings{NIPS2016_16026d60,\n author = {Lamb, Alex M and ALIAS PARTH GOYAL, Anirudh Goyal and Zhang, Ying and Zhang, Saizheng and Courville, Aaron C and Bengio, Yoshua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Professor Forcing: A New Algorithm for Training Recurrent Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/16026d60ff9b54410b3435b403afd226-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/16026d60ff9b54410b3435b403afd226-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/16026d60ff9b54410b3435b403afd226-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/16026d60ff9b54410b3435b403afd226-Reviews.html", "metareview": "", "pdf_size": 497900, "gs_citation": 805, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17198780094986434106&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "MILA, Universit\u00e9 de Montr\u00e9al; MILA, Universit\u00e9 de Montr\u00e9al; MILA, Universit\u00e9 de Montr\u00e9al; MILA, Universit\u00e9 de Montr\u00e9al; MILA, Universit\u00e9 de Montr\u00e9al; CIFAR", "aff_domain": "gmail.com;gmail.com;gmail.com;gmail.com;gmail.com;gmail.com", "email": "gmail.com;gmail.com;gmail.com;gmail.com;gmail.com;gmail.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/16026d60ff9b54410b3435b403afd226-Abstract.html", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Canadian Institute for Advanced Research", "aff_unique_dep": "MILA;", "aff_unique_url": "https://www.umontreal.ca;https://www.cifar.ca", "aff_unique_abbr": "UdeM;CIFAR", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Montr\u00e9al;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Protein contact prediction from amino acid co-evolution using convolutional networks for graph-valued images", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7421", "id": "7421", "author_site": "Vladimir Golkov, Marcin Skwark, Antonij Golkov, Alexey Dosovitskiy, Thomas Brox, Jens Meiler, Daniel Cremers", "author": "Vladimir Golkov; Marcin J Skwark; Antonij Golkov; Alexey Dosovitskiy; Thomas Brox; Jens Meiler; Daniel Cremers", "abstract": "Proteins are the \"building blocks of life\", the most abundant organic molecules, and the central focus of most areas of biomedicine. Protein structure is strongly related to protein function, thus structure prediction is a crucial task on the way to solve many biological questions. A contact map is a compact representation of the three-dimensional structure of a protein via the pairwise contacts between the amino acid constituting the protein. We use a convolutional network to calculate protein contact maps from inferred statistical coupling between positions in the protein sequence. The input to the network has an image-like structure amenable to convolutions, but every \"pixel\" instead of color channels contains a bipartite undirected edge-weighted graph. We propose several methods for treating such \"graph-valued images\" in a convolutional network. The proposed method outperforms state-of-the-art methods by a large margin. It also allows for a great flexibility with regard to the input data, which makes it useful for studying a wide range of problems.", "bibtex": "@inproceedings{NIPS2016_2cad8fa4,\n author = {Golkov, Vladimir and Skwark, Marcin J and Golkov, Antonij and Dosovitskiy, Alexey and Brox, Thomas and Meiler, Jens and Cremers, Daniel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Protein contact prediction from amino acid co-evolution using convolutional networks for graph-valued images},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2cad8fa47bbef282badbb8de5374b894-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2cad8fa47bbef282badbb8de5374b894-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2cad8fa47bbef282badbb8de5374b894-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2cad8fa47bbef282badbb8de5374b894-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2cad8fa47bbef282badbb8de5374b894-Reviews.html", "metareview": "", "pdf_size": 2781082, "gs_citation": 53, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14342248518437608609&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Technical University of Munich; Vanderbilt University; University of Augsburg; University of Freiburg; University of Freiburg; Vanderbilt University; Technical University of Munich", "aff_domain": "cs.tum.edu;skwark.pl;student.uni-augsburg.de;cs.uni-freiburg.de;cs.uni-freiburg.de;vanderbilt.edu;tum.de", "email": "cs.tum.edu;skwark.pl;student.uni-augsburg.de;cs.uni-freiburg.de;cs.uni-freiburg.de;vanderbilt.edu;tum.de", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2cad8fa47bbef282badbb8de5374b894-Abstract.html", "aff_unique_index": "0;1;2;3;3;1;0", "aff_unique_norm": "Technical University of Munich;Vanderbilt University;University of Augsburg;University of Freiburg", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.tum.de;https://www.vanderbilt.edu;https://www.uni-augsburg.de;https://www.uni-freiburg.de", "aff_unique_abbr": "TUM;Vanderbilt;UOA;UoF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;1;0", "aff_country_unique": "Germany;United States" }, { "title": "Provable Efficient Online Matrix Completion via Non-convex Stochastic Gradient Descent", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7390", "id": "7390", "author_site": "Chi Jin, Sham Kakade, Praneeth Netrapalli", "author": "Chi Jin; Sham M. Kakade; Praneeth Netrapalli", "abstract": "Matrix completion, where we wish to recover a low rank matrix by observing a few entries from it, is a widely studied problem in both theory and practice with wide applications. Most of the provable algorithms so far on this problem have been restricted to the offline setting where they provide an estimate of the unknown matrix using all observations simultaneously. However, in many applications, the online version, where we observe one entry at a time and dynamically update our estimate, is more appealing. While existing algorithms are efficient for the offline setting, they could be highly inefficient for the online setting. In this paper, we propose the first provable, efficient online algorithm for matrix completion. Our algorithm starts from an initial estimate of the matrix and then performs non-convex stochastic gradient descent (SGD). After every observation, it performs a fast update involving only one row of two tall matrices, giving near linear total runtime. Our algorithm can be naturally used in the offline setting as well, where it gives competitive sample complexity and runtime to state of the art algorithms. Our proofs introduce a general framework to show that SGD updates tend to stay away from saddle surfaces and could be of broader interests to other non-convex problems.", "bibtex": "@inproceedings{NIPS2016_38651c44,\n author = {Jin, Chi and Kakade, Sham M and Netrapalli, Praneeth},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Provable Efficient Online Matrix Completion via Non-convex Stochastic Gradient Descent},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/38651c4450f87348fcbe1f992746a954-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/38651c4450f87348fcbe1f992746a954-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/38651c4450f87348fcbe1f992746a954-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/38651c4450f87348fcbe1f992746a954-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/38651c4450f87348fcbe1f992746a954-Reviews.html", "metareview": "", "pdf_size": 287529, "gs_citation": 122, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7629313754271544655&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "UC Berkeley; University of Washington; Microsoft Research India", "aff_domain": "cs.berkeley.edu;cs.washington.edu;microsoft.com", "email": "cs.berkeley.edu;cs.washington.edu;microsoft.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/38651c4450f87348fcbe1f992746a954-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Berkeley;University of Washington;Microsoft", "aff_unique_dep": ";;Microsoft Research India", "aff_unique_url": "https://www.berkeley.edu;https://www.washington.edu;https://www.microsoft.com/en-us/research/group/microsoft-research-india", "aff_unique_abbr": "UC Berkeley;UW;MSR India", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;India" }, { "title": "Proximal Deep Structured Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7308", "id": "7308", "author_site": "Shenlong Wang, Sanja Fidler, Raquel Urtasun", "author": "Shenlong Wang; Sanja Fidler; Raquel Urtasun", "abstract": "Many problems in real-world applications involve predicting continuous-valued random variables that are statistically related. In this paper, we propose a powerful deep structured model that is able to learn complex non-linear functions which encode the dependencies between continuous output variables. We show that inference in our model using proximal methods can be efficiently solved as a feed-foward pass of a special type of deep recurrent neural network. We demonstrate the effectiveness of our approach in the tasks of image denoising, depth refinement and optical flow estimation.", "bibtex": "@inproceedings{NIPS2016_f4be0027,\n author = {Wang, Shenlong and Fidler, Sanja and Urtasun, Raquel},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Proximal Deep Structured Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f4be00279ee2e0a53eafdaa94a151e2c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f4be00279ee2e0a53eafdaa94a151e2c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f4be00279ee2e0a53eafdaa94a151e2c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f4be00279ee2e0a53eafdaa94a151e2c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f4be00279ee2e0a53eafdaa94a151e2c-Reviews.html", "metareview": "", "pdf_size": 8903440, "gs_citation": 90, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1637504371661723490&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 5, "aff": "University of Toronto; University of Toronto; University of Toronto", "aff_domain": "cs.toronto.edu;cs.toronto.edu;cs.toronto.edu", "email": "cs.toronto.edu;cs.toronto.edu;cs.toronto.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f4be00279ee2e0a53eafdaa94a151e2c-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "title": "Proximal Stochastic Methods for Nonsmooth Nonconvex Finite-Sum Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6920", "id": "6920", "author_site": "Sashank J. Reddi, Suvrit Sra, Barnabas Poczos, Alexander Smola", "author": "Sashank J. Reddi; Suvrit Sra; Barnabas Poczos; Alexander J Smola", "abstract": "We analyze stochastic algorithms for optimizing nonconvex, nonsmooth finite-sum problems, where the nonsmooth part is convex. Surprisingly, unlike the smooth case, our knowledge of this fundamental problem is very limited. For example, it is not known whether the proximal stochastic gradient method with constant minibatch converges to a stationary point. To tackle this issue, we develop fast stochastic algorithms that provably converge to a stationary point for constant minibatches. Furthermore, using a variant of these algorithms, we obtain provably faster convergence than batch proximal gradient descent. Our results are based on the recent variance reduction techniques for convex optimization but with a novel analysis for handling nonconvex and nonsmooth functions. We also prove global linear convergence rate for an interesting subclass of nonsmooth nonconvex functions, which subsumes several recent works.", "bibtex": "@inproceedings{NIPS2016_291597a1,\n author = {J. Reddi, Sashank and Sra, Suvrit and Poczos, Barnabas and Smola, Alexander J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Proximal Stochastic Methods for Nonsmooth Nonconvex Finite-Sum Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/291597a100aadd814d197af4f4bab3a7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/291597a100aadd814d197af4f4bab3a7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/291597a100aadd814d197af4f4bab3a7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/291597a100aadd814d197af4f4bab3a7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/291597a100aadd814d197af4f4bab3a7-Reviews.html", "metareview": "", "pdf_size": 527373, "gs_citation": 268, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3155178725630404259&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Carnegie Mellon University; Massachusetts Institute of Technology; Carnegie Mellon University; Carnegie Mellon University", "aff_domain": "cs.cmu.edu;mit.edu;cs.cmu.edu;smola.org", "email": "cs.cmu.edu;mit.edu;cs.cmu.edu;smola.org", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/291597a100aadd814d197af4f4bab3a7-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Carnegie Mellon University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://web.mit.edu", "aff_unique_abbr": "CMU;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Pruning Random Forests for Prediction on a Budget", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7378", "id": "7378", "author_site": "Feng Nan, Joseph Wang, Venkatesh Saligrama", "author": "Feng Nan; Joseph Wang; Venkatesh Saligrama", "abstract": "We propose to prune a random forest (RF) for resource-constrained prediction. We first construct a RF and then prune it to optimize expected feature cost & accuracy. We pose pruning RFs as a novel 0-1 integer program with linear constraints that encourages feature re-use. We establish total unimodularity of the constraint set to prove that the corresponding LP relaxation solves the original integer program. We then exploit connections to combinatorial optimization and develop an efficient primal-dual algorithm, scalable to large datasets. In contrast to our bottom-up approach, which benefits from good RF initialization, conventional methods are top-down acquiring features based on their utility value and is generally intractable, requiring heuristics. Empirically, our pruning algorithm outperforms existing state-of-the-art resource-constrained algorithms.", "bibtex": "@inproceedings{NIPS2016_3948ead6,\n author = {Nan, Feng and Wang, Joseph and Saligrama, Venkatesh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Pruning Random Forests for Prediction on a Budget},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3948ead63a9f2944218de038d8934305-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3948ead63a9f2944218de038d8934305-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3948ead63a9f2944218de038d8934305-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3948ead63a9f2944218de038d8934305-Reviews.html", "metareview": "", "pdf_size": 286576, "gs_citation": 102, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11738744555940599350&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Systems Engineering; Electrical Engineering; Electrical Engineering", "aff_domain": "bu.edu;bu.edu;bu.edu", "email": "bu.edu;bu.edu;bu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3948ead63a9f2944218de038d8934305-Abstract.html", "aff_unique_index": "0;1;1", "aff_unique_norm": "Systems Engineering;Electrical Engineering Department", "aff_unique_dep": "Department of Systems Engineering;Electrical Engineering", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "Quantized Random Projections and Non-Linear Estimation of Cosine Similarity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7018", "id": "7018", "author_site": "Ping Li, Michael Mitzenmacher, Martin Slawski", "author": "Ping Li; Michael Mitzenmacher; Martin Slawski", "abstract": "Random projections constitute a simple, yet effective technique for dimensionality reduction with applications in learning and search problems. In the present paper, we consider the problem of estimating cosine similarities when the projected data undergo scalar quantization to $b$ bits. We here argue that the maximum likelihood estimator (MLE) is a principled approach to deal with the non-linearity resulting from quantization, and subsequently study its computational and statistical properties. A specific focus is on the on the trade-off between bit depth and the number of projections given a fixed budget of bits for storage or transmission. Along the way, we also touch upon the existence of a qualitative counterpart to the Johnson-Lindenstrauss lemma in the presence of quantization.", "bibtex": "@inproceedings{NIPS2016_186a157b,\n author = {Li, Ping and Mitzenmacher, Michael and Slawski, Martin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Quantized Random Projections and Non-Linear Estimation of Cosine Similarity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/186a157b2992e7daed3677ce8e9fe40f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/186a157b2992e7daed3677ce8e9fe40f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/186a157b2992e7daed3677ce8e9fe40f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/186a157b2992e7daed3677ce8e9fe40f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/186a157b2992e7daed3677ce8e9fe40f-Reviews.html", "metareview": "", "pdf_size": 668885, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16594780298069206805&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Rutgers University; Harvard University; Rutgers University", "aff_domain": "stat.rutgers.edu;eecs.harvard.edu;rutgers.edu", "email": "stat.rutgers.edu;eecs.harvard.edu;rutgers.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/186a157b2992e7daed3677ce8e9fe40f-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Rutgers University;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "https://www.rutgers.edu;https://www.harvard.edu", "aff_unique_abbr": "Rutgers;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Quantum Perceptron Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7169", "id": "7169", "author_site": "Ashish Kapoor, Nathan Wiebe, Krysta Svore", "author": "Ashish Kapoor; Nathan Wiebe; Krysta Svore", "abstract": "We demonstrate how quantum computation can provide non-trivial improvements in the computational and statistical complexity of the perceptron model. We develop two quantum algorithms for perceptron learning. The first algorithm exploits quantum information processing to determine a separating hyperplane using a number of steps sublinear in the number of data points $N$, namely $O(\\sqrt{N})$. The second algorithm illustrates how the classical mistake bound of $O(\\frac{1}{\\gamma^2})$ can be further improved to $O(\\frac{1}{\\sqrt{\\gamma}})$ through quantum means, where $\\gamma$ denotes the margin. Such improvements are achieved through the application of quantum amplitude amplification to the version space interpretation of the perceptron model.", "bibtex": "@inproceedings{NIPS2016_d47268e9,\n author = {Kapoor, Ashish and Wiebe, Nathan and Svore, Krysta},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Quantum Perceptron Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d47268e9db2e9aa3827bba3afb7ff94a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d47268e9db2e9aa3827bba3afb7ff94a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d47268e9db2e9aa3827bba3afb7ff94a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d47268e9db2e9aa3827bba3afb7ff94a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d47268e9db2e9aa3827bba3afb7ff94a-Reviews.html", "metareview": "", "pdf_size": 400109, "gs_citation": 212, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3920563829789798875&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "aff": "Microsoft Research; Microsoft Research; Microsoft Research", "aff_domain": "microsoft.com;microsoft.com;microsoft.com", "email": "microsoft.com;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d47268e9db2e9aa3827bba3afb7ff94a-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "R-FCN: Object Detection via Region-based Fully Convolutional Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7403", "id": "7403", "author_site": "jifeng dai, Yi Li, Kaiming He, Jian Sun", "author": "Jifeng Dai; Yi Li; Kaiming He; Jian Sun", "abstract": "We present region-based, fully convolutional networks for accurate and efficient object detection. In contrast to previous region-based detectors such as Fast/Faster R-CNN that apply a costly per-region subnetwork hundreds of times, our region-based detector is fully convolutional with almost all computation shared on the entire image. To achieve this goal, we propose position-sensitive score maps to address a dilemma between translation-invariance in image classification and translation-variance in object detection. Our method can thus naturally adopt fully convolutional image classifier backbones, such as the latest Residual Networks (ResNets), for object detection. We show competitive results on the PASCAL VOC datasets (e.g., 83.6% mAP on the 2007 set) with the 101-layer ResNet. Meanwhile, our result is achieved at a test-time speed of 170ms per image, 2.5-20 times faster than the Faster R-CNN counterpart. Code is made publicly available at: https://github.com/daijifeng001/r-fcn.", "bibtex": "@inproceedings{NIPS2016_577ef115,\n author = {Dai, Jifeng and Li, Yi and He, Kaiming and Sun, Jian},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {R-FCN: Object Detection via Region-based Fully Convolutional Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/577ef1154f3240ad5b9b413aa7346a1e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/577ef1154f3240ad5b9b413aa7346a1e-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/577ef1154f3240ad5b9b413aa7346a1e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/577ef1154f3240ad5b9b413aa7346a1e-Reviews.html", "metareview": "", "pdf_size": 645670, "gs_citation": 8281, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14880935744314366653&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/577ef1154f3240ad5b9b413aa7346a1e-Abstract.html" }, { "title": "RETAIN: An Interpretable Predictive Model for Healthcare using Reverse Time Attention Mechanism", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7125", "id": "7125", "author_site": "Edward Choi, Taha Bahadori, Jimeng Sun, Joshua Kulas, Andy Schuetz, Walter Stewart", "author": "Edward Choi; Mohammad Taha Bahadori; Jimeng Sun; Joshua Kulas; Andy Schuetz; Walter Stewart", "abstract": "Accuracy and interpretability are two dominant features of successful predictive models. Typically, a choice must be made in favor of complex black box models such as recurrent neural networks (RNN) for accuracy versus less accurate but more interpretable traditional models such as logistic regression. This tradeoff poses challenges in medicine where both accuracy and interpretability are important. We addressed this challenge by developing the REverse Time AttentIoN model (RETAIN) for application to Electronic Health Records (EHR) data. RETAIN achieves high accuracy while remaining clinically interpretable and is based on a two-level neural attention model that detects influential past visits and significant clinical variables within those visits (e.g. key diagnoses). RETAIN mimics physician practice by attending the EHR data in a reverse time order so that recent clinical visits are likely to receive higher attention. RETAIN was tested on a large health system EHR dataset with 14 million visits completed by 263K patients over an 8 year period and demonstrated predictive accuracy and computational scalability comparable to state-of-the-art methods such as RNN, and ease of interpretability comparable to traditional models.", "bibtex": "@inproceedings{NIPS2016_231141b3,\n author = {Choi, Edward and Bahadori, Mohammad Taha and Sun, Jimeng and Kulas, Joshua and Schuetz, Andy and Stewart, Walter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {RETAIN: An Interpretable Predictive Model for Healthcare using Reverse Time Attention Mechanism},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/231141b34c82aa95e48810a9d1b33a79-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/231141b34c82aa95e48810a9d1b33a79-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/231141b34c82aa95e48810a9d1b33a79-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/231141b34c82aa95e48810a9d1b33a79-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/231141b34c82aa95e48810a9d1b33a79-Reviews.html", "metareview": "", "pdf_size": 584116, "gs_citation": 1761, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12067026852472885249&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "aff": "Georgia Institute of Technology; Georgia Institute of Technology; Georgia Institute of Technology; Sutter Health; Sutter Health; Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;sutterhealth.org;sutterhealth.org;cc.gatech.edu", "email": "gatech.edu;gatech.edu;gatech.edu;sutterhealth.org;sutterhealth.org;cc.gatech.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/231141b34c82aa95e48810a9d1b33a79-Abstract.html", "aff_unique_index": "0;0;0;1;1;0", "aff_unique_norm": "Georgia Institute of Technology;Sutter Health", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.sutterhealth.org", "aff_unique_abbr": "Georgia Tech;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Reconstructing Parameters of Spreading Models from Partial Observations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6930", "id": "6930", "author": "Andrey Lokhov", "abstract": "Spreading processes are often modelled as a stochastic dynamics occurring on top of a given network with edge weights corresponding to the transmission probabilities. Knowledge of veracious transmission probabilities is essential for prediction, optimization, and control of diffusion dynamics. Unfortunately, in most cases the transmission rates are unknown and need to be reconstructed from the spreading data. Moreover, in realistic settings it is impossible to monitor the state of each node at every time, and thus the data is highly incomplete. We introduce an efficient dynamic message-passing algorithm, which is able to reconstruct parameters of the spreading model given only partial information on the activation times of nodes in the network. The method is generalizable to a large class of dynamic models, as well to the case of temporal graphs.", "bibtex": "@inproceedings{NIPS2016_404dcc91,\n author = {Lokhov, Andrey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Reconstructing Parameters of Spreading Models from Partial Observations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/404dcc91b2aeaa7caa47487d1483e48a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/404dcc91b2aeaa7caa47487d1483e48a-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/404dcc91b2aeaa7caa47487d1483e48a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/404dcc91b2aeaa7caa47487d1483e48a-Reviews.html", "metareview": "", "pdf_size": 523239, "gs_citation": 45, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16194703749727201076&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Center for Nonlinear Studies and Theoretical Division T-4, Los Alamos National Laboratory, Los Alamos, NM 87545, USA", "aff_domain": "lanl.gov", "email": "lanl.gov", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/404dcc91b2aeaa7caa47487d1483e48a-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Los Alamos National Laboratory", "aff_unique_dep": "Center for Nonlinear Studies", "aff_unique_url": "https://www.lanl.gov", "aff_unique_abbr": "LANL", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Alamos", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Recovery Guarantee of Non-negative Matrix Factorization via Alternating Updates", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7307", "id": "7307", "author_site": "Yuanzhi Li, Yingyu Liang, Andrej Risteski", "author": "Yuanzhi Li; Yingyu Liang; Andrej Risteski", "abstract": "Non-negative matrix factorization is a popular tool for decomposing data into feature and weight matrices under non-negativity constraints. It enjoys practical success but is poorly understood theoretically. This paper proposes an algorithm that alternates between decoding the weights and updating the features, and shows that assuming a generative model of the data, it provably recovers the ground-truth under fairly mild conditions. In particular, its only essential requirement on features is linear independence. Furthermore, the algorithm uses ReLU to exploit the non-negativity for decoding the weights, and thus can tolerate adversarial noise that can potentially be as large as the signal, and can tolerate unbiased noise much larger than the signal. The analysis relies on a carefully designed coupling between two potential functions, which we believe is of independent interest.", "bibtex": "@inproceedings{NIPS2016_9657c1ff,\n author = {Li, Yuanzhi and Liang, Yingyu and Risteski, Andrej},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Recovery Guarantee of Non-negative Matrix Factorization via Alternating Updates},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9657c1fffd38824e5ab0472e022e577e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9657c1fffd38824e5ab0472e022e577e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9657c1fffd38824e5ab0472e022e577e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9657c1fffd38824e5ab0472e022e577e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9657c1fffd38824e5ab0472e022e577e-Reviews.html", "metareview": "", "pdf_size": 325292, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16061327726646184183&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 8, "aff": "Computer Science Department at Princeton University; Computer Science Department at Princeton University; Computer Science Department at Princeton University", "aff_domain": "cs.princeton.edu;cs.princeton.edu;cs.princeton.edu", "email": "cs.princeton.edu;cs.princeton.edu;cs.princeton.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9657c1fffd38824e5ab0472e022e577e-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Princeton", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Refined Lower Bounds for Adversarial Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7357", "id": "7357", "author_site": "S\u00e9bastien Gerchinovitz, Tor Lattimore", "author": "S\u00e9bastien Gerchinovitz; Tor Lattimore", "abstract": "We provide new lower bounds on the regret that must be suffered by adversarial bandit algorithms. The new results show that recent upper bounds that either (a) hold with high-probability or (b) depend on the total loss of the best arm or (c) depend on the quadratic variation of the losses, are close to tight. Besides this we prove two impossibility results. First, the existence of a single arm that is optimal in every round cannot improve the regret in the worst case. Second, the regret cannot scale with the effective range of the losses. In contrast, both results are possible in the full-information setting.", "bibtex": "@inproceedings{NIPS2016_2f37d101,\n author = {Gerchinovitz, S\\'{e}bastien and Lattimore, Tor},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Refined Lower Bounds for Adversarial Bandits},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2f37d10131f2a483a8dd005b3d14b0d9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2f37d10131f2a483a8dd005b3d14b0d9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2f37d10131f2a483a8dd005b3d14b0d9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2f37d10131f2a483a8dd005b3d14b0d9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2f37d10131f2a483a8dd005b3d14b0d9-Reviews.html", "metareview": "", "pdf_size": 313541, "gs_citation": 61, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4401059348919523419&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Institut de Math\u00e9matiques de Toulouse, Universit\u00e9 Toulouse 3 Paul Sabatier, Toulouse, 31062, France; Department of Computing Science, University of Alberta, Edmonton, Canada", "aff_domain": "math.univ-toulouse.fr;gmail.com", "email": "math.univ-toulouse.fr;gmail.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2f37d10131f2a483a8dd005b3d14b0d9-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 Toulouse 3 Paul Sabatier;University of Alberta", "aff_unique_dep": "Institut de Math\u00e9matiques de Toulouse;Department of Computing Science", "aff_unique_url": "https://www.univ-toulouse.fr;https://www.ualberta.ca", "aff_unique_abbr": "UT3;UAlberta", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Toulouse;Edmonton", "aff_country_unique_index": "0;1", "aff_country_unique": "France;Canada" }, { "title": "Regret Bounds for Non-decomposable Metrics with Missing Labels", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7338", "id": "7338", "author_site": "Nagarajan Natarajan, Prateek Jain", "author": "Nagarajan Natarajan; Prateek Jain", "abstract": "We consider the problem of recommending relevant labels (items) for a given data point (user). In particular, we are interested in the practically important setting where the evaluation is with respect to non-decomposable (over labels) performance metrics like the $F_1$ measure, \\emph{and} training data has missing labels. To this end, we propose a generic framework that given a performance metric $\\Psi$, can devise a regularized objective function and a threshold such that all the values in the predicted score vector above and only above the threshold are selected to be positive. We show that the regret or generalization error in the given metric $\\Psi$ is bounded ultimately by estimation error of certain underlying parameters. In particular, we derive regret bounds under three popular settings: a) collaborative filtering, b) multilabel classification, and c) PU (positive-unlabeled) learning. For each of the above problems, we can obtain precise non-asymptotic regret bound which is small even when a large fraction of labels is missing. Our empirical results on synthetic and benchmark datasets demonstrate that by explicitly modeling for missing labels and optimizing the desired performance metric, our algorithm indeed achieves significantly better performance (like $F_1$ score) when compared to methods that do not model missing label information carefully.", "bibtex": "@inproceedings{NIPS2016_2dffbc47,\n author = {Natarajan, Nagarajan and Jain, Prateek},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Regret Bounds for Non-decomposable Metrics with Missing Labels},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2dffbc474aa176b6dc957938c15d0c8b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2dffbc474aa176b6dc957938c15d0c8b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2dffbc474aa176b6dc957938c15d0c8b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2dffbc474aa176b6dc957938c15d0c8b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2dffbc474aa176b6dc957938c15d0c8b-Reviews.html", "metareview": "", "pdf_size": 11551614, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13782556489438131261&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 3, "aff": ";", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2dffbc474aa176b6dc957938c15d0c8b-Abstract.html" }, { "title": "Regret of Queueing Bandits", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8499", "id": "8499", "author_site": "Subhashini Krishnasamy, Rajat Sen, Ramesh Johari, Sanjay Shakkottai", "author": "Subhashini Krishnasamy; Rajat Sen; Ramesh Johari; Sanjay Shakkottai", "abstract": "We consider a variant of the multiarmed bandit problem where jobs queue for service, and service rates of different servers may be unknown. We study algorithms that minimize queue-regret: the (expected) difference between the queue-lengths obtained by the algorithm, and those obtained by a genie-aided matching algorithm that knows exact service rates. A naive view of this problem would suggest that queue-regret should grow logarithmically: since queue-regret cannot be larger than classical regret, results for the standard MAB problem give algorithms that ensure queue-regret increases no more than logarithmically in time. Our paper shows surprisingly more complex behavior. In particular, the naive intuition is correct as long as the bandit algorithm's queues have relatively long regenerative cycles: in this case queue-regret is similar to cumulative regret, and scales (essentially) logarithmically. However, we show that this \"early stage\" of the queueing bandit eventually gives way to a \"late stage\", where the optimal queue-regret scaling is O(1/t). We demonstrate an algorithm that (order-wise) achieves this asymptotic queue-regret, and also exhibits close to optimal switching time from the early stage to the late stage.", "bibtex": "@inproceedings{NIPS2016_430c3626,\n author = {Krishnasamy, Subhashini and Sen, Rajat and Johari, Ramesh and Shakkottai, Sanjay},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Regret of Queueing Bandits},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/430c3626b879b4005d41b8a46172e0c0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/430c3626b879b4005d41b8a46172e0c0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/430c3626b879b4005d41b8a46172e0c0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/430c3626b879b4005d41b8a46172e0c0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/430c3626b879b4005d41b8a46172e0c0-Reviews.html", "metareview": "", "pdf_size": 2139340, "gs_citation": 80, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17994198469341135453&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/430c3626b879b4005d41b8a46172e0c0-Abstract.html" }, { "title": "Regularization With Stochastic Transformations and Perturbations for Deep Semi-Supervised Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6911", "id": "6911", "author_site": "Mehdi Sajjadi, Mehran Javanmardi, Tolga Tasdizen", "author": "Mehdi Sajjadi; Mehran Javanmardi; Tolga Tasdizen", "abstract": "Effective convolutional neural networks are trained on large sets of labeled data. However, creating large labeled datasets is a very costly and time-consuming task. Semi-supervised learning uses unlabeled data to train a model with higher accuracy when there is a limited set of labeled data available. In this paper, we consider the problem of semi-supervised learning with convolutional neural networks. Techniques such as randomized data augmentation, dropout and random max-pooling provide better generalization and stability for classifiers that are trained using gradient descent. Multiple passes of an individual sample through the network might lead to different predictions due to the non-deterministic behavior of these techniques. We propose an unsupervised loss function that takes advantage of the stochastic nature of these methods and minimizes the difference between the predictions of multiple passes of a training sample through the network. We evaluate the proposed method on several benchmark datasets.", "bibtex": "@inproceedings{NIPS2016_30ef30b6,\n author = {Sajjadi, Mehdi and Javanmardi, Mehran and Tasdizen, Tolga},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Regularization With Stochastic Transformations and Perturbations for Deep Semi-Supervised Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/30ef30b64204a3088a26bc2e6ecf7602-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/30ef30b64204a3088a26bc2e6ecf7602-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/30ef30b64204a3088a26bc2e6ecf7602-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/30ef30b64204a3088a26bc2e6ecf7602-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/30ef30b64204a3088a26bc2e6ecf7602-Reviews.html", "metareview": "", "pdf_size": 262174, "gs_citation": 1445, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=811577089436189841&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Electrical and Computer Engineering, University of Utah; Department of Electrical and Computer Engineering, University of Utah; Department of Electrical and Computer Engineering, University of Utah", "aff_domain": "sci.utah.edu;sci.utah.edu;sci.utah.edu", "email": "sci.utah.edu;sci.utah.edu;sci.utah.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/30ef30b64204a3088a26bc2e6ecf7602-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Utah", "aff_unique_dep": "Department of Electrical and Computer Engineering", "aff_unique_url": "https://www.utah.edu", "aff_unique_abbr": "Utah", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Regularized Nonlinear Acceleration", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7410", "id": "7410", "author_site": "Damien Scieur, Alexandre d'Aspremont, Francis Bach", "author": "Damien Scieur; Alexandre d'Aspremont; Francis Bach", "abstract": "We describe a convergence acceleration technique for generic optimization problems. Our scheme computes estimates of the optimum from a nonlinear average of the iterates produced by any optimization method. The weights in this average are computed via a simple and small linear system, whose solution can be updated online. This acceleration scheme runs in parallel to the base algorithm, providing improved estimates of the solution on the fly, while the original optimization method is running. Numerical experiments are detailed on classical classification problems.", "bibtex": "@inproceedings{NIPS2016_bbf94b34,\n author = {Scieur, Damien and d\\textquotesingle Aspremont, Alexandre and Bach, Francis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Regularized Nonlinear Acceleration},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/bbf94b34eb32268ada57a3be5062fe7d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/bbf94b34eb32268ada57a3be5062fe7d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/bbf94b34eb32268ada57a3be5062fe7d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/bbf94b34eb32268ada57a3be5062fe7d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/bbf94b34eb32268ada57a3be5062fe7d-Reviews.html", "metareview": "", "pdf_size": 392871, "gs_citation": 177, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13830453205955709162&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 20, "aff": "INRIA & D.I., UMR 8548, \u00c9cole Normale Sup\u00e9rieure, Paris, France; CNRS & D.I., UMR 8548, \u00c9cole Normale Sup\u00e9rieure, Paris, France; INRIA & D.I., UMR 8548, \u00c9cole Normale Sup\u00e9rieure, Paris, France", "aff_domain": "inria.fr;di.ens.fr;inria.fr", "email": "inria.fr;di.ens.fr;inria.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/bbf94b34eb32268ada57a3be5062fe7d-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "INRIA;\u00c9cole Normale Sup\u00e9rieure", "aff_unique_dep": "D.I., UMR 8548, \u00c9cole Normale Sup\u00e9rieure;UMR 8548", "aff_unique_url": "https://www.inria.fr;https://www.ens.fr", "aff_unique_abbr": "INRIA;ENS", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Paris", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "Relevant sparse codes with variational information bottleneck", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7427", "id": "7427", "author_site": "Matthew Chalk, Olivier Marre, Gasper Tkacik", "author": "Matthew Chalk; Olivier Marre; Gasper Tkacik", "abstract": "In many applications, it is desirable to extract only the relevant aspects of data. A principled way to do this is the information bottleneck (IB) method, where one seeks a code that maximises information about a relevance variable, Y, while constraining the information encoded about the original data, X. Unfortunately however, the IB method is computationally demanding when data are high-dimensional and/or non-gaussian. Here we propose an approximate variational scheme for maximising a lower bound on the IB objective, analogous to variational EM. Using this method, we derive an IB algorithm to recover features that are both relevant and sparse. Finally, we demonstrate how kernelised versions of the algorithm can be used to address a broad range of problems with non-linear relation between X and Y.", "bibtex": "@inproceedings{NIPS2016_a89cf525,\n author = {Chalk, Matthew and Marre, Olivier and Tkacik, Gasper},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Relevant sparse codes with variational information bottleneck},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a89cf525e1d9f04d16ce31165e139a4b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a89cf525e1d9f04d16ce31165e139a4b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a89cf525e1d9f04d16ce31165e139a4b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a89cf525e1d9f04d16ce31165e139a4b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a89cf525e1d9f04d16ce31165e139a4b-Reviews.html", "metareview": "", "pdf_size": 1015469, "gs_citation": 98, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10985919014377816696&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "IST Austria; Institut de la Vision; IST Austria", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a89cf525e1d9f04d16ce31165e139a4b-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Institute of Science and Technology Austria;Institut de la Vision", "aff_unique_dep": ";", "aff_unique_url": "https://www.ist.ac.at;https://www.institutdelavision.org", "aff_unique_abbr": "IST Austria;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Austria;France" }, { "title": "Reshaped Wirtinger Flow for Solving Quadratic System of Equations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7147", "id": "7147", "author_site": "Huishuai Zhang, Yingbin Liang", "author": "Huishuai Zhang; Yingbin Liang", "abstract": "We study the problem of recovering a vector $\\bx\\in \\bbR^n$ from its magnitude measurements $y_i=|\\langle \\ba_i, \\bx\\rangle|, i=1,..., m$. Our work is along the line of the Wirtinger flow (WF) approach \\citet{candes2015phase}, which solves the problem by minimizing a nonconvex loss function via a gradient algorithm and can be shown to converge to a global optimal point under good initialization. In contrast to the smooth loss function used in WF, we adopt a nonsmooth but lower-order loss function, and design a gradient-like algorithm (referred to as reshaped-WF). We show that for random Gaussian measurements, reshaped-WF enjoys geometric convergence to a global optimal point as long as the number $m$ of measurements is at the order of $\\cO(n)$, where $n$ is the dimension of the unknown $\\bx$. This improves the sample complexity of WF, and achieves the same sample complexity as truncated-WF \\citet{chen2015solving} but without truncation at gradient step. Furthermore, reshaped-WF costs less computationally than WF, and runs faster numerically than both WF and truncated-WF. Bypassing higher-order variables in the loss function and truncations in the gradient loop, analysis of reshaped-WF is simplified.", "bibtex": "@inproceedings{NIPS2016_83adc922,\n author = {Zhang, Huishuai and Liang, Yingbin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Reshaped Wirtinger Flow for Solving Quadratic System of Equations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/83adc9225e4deb67d7ce42d58fe5157c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/83adc9225e4deb67d7ce42d58fe5157c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/83adc9225e4deb67d7ce42d58fe5157c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/83adc9225e4deb67d7ce42d58fe5157c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/83adc9225e4deb67d7ce42d58fe5157c-Reviews.html", "metareview": "", "pdf_size": 4514827, "gs_citation": 165, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6404923341108810219&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Department of EECS, Syracuse University; Department of EECS, Syracuse University", "aff_domain": "syr.edu;syr.edu", "email": "syr.edu;syr.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/83adc9225e4deb67d7ce42d58fe5157c-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Syracuse University", "aff_unique_dep": "Department of EECS", "aff_unique_url": "https://www.syracuse.edu", "aff_unique_abbr": "Syracuse", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Residual Networks Behave Like Ensembles of Relatively Shallow Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7167", "id": "7167", "author_site": "Andreas Veit, Michael J Wilber, Serge Belongie", "author": "Andreas Veit; Michael J Wilber; Serge Belongie", "abstract": "In this work we propose a novel interpretation of residual networks showing that they can be seen as a collection of many paths of differing length. Moreover, residual networks seem to enable very deep networks by leveraging only the short paths during training. To support this observation, we rewrite residual networks as an explicit collection of paths. Unlike traditional models, paths through residual networks vary in length. Further, a lesion study reveals that these paths show ensemble-like behavior in the sense that they do not strongly depend on each other. Finally, and most surprising, most paths are shorter than one might expect, and only the short paths are needed during training, as longer paths do not contribute any gradient. For example, most of the gradient in a residual network with 110 layers comes from paths that are only 10-34 layers deep. Our results reveal one of the key characteristics that seem to enable the training of very deep networks: Residual networks avoid the vanishing gradient problem by introducing short paths which can carry gradient throughout the extent of very deep networks.", "bibtex": "@inproceedings{NIPS2016_37bc2f75,\n author = {Veit, Andreas and Wilber, Michael J and Belongie, Serge},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Residual Networks Behave Like Ensembles of Relatively Shallow Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/37bc2f75bf1bcfe8450a1a41c200364c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/37bc2f75bf1bcfe8450a1a41c200364c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/37bc2f75bf1bcfe8450a1a41c200364c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/37bc2f75bf1bcfe8450a1a41c200364c-Reviews.html", "metareview": "", "pdf_size": 554369, "gs_citation": 1429, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17069814828377193048&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science & Cornell Tech; Department of Computer Science & Cornell Tech; Department of Computer Science & Cornell Tech", "aff_domain": "cornell.edu;cornell.edu;cornell.edu", "email": "cornell.edu;cornell.edu;cornell.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/37bc2f75bf1bcfe8450a1a41c200364c-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cornell Tech", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Review Networks for Caption Generation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6934", "id": "6934", "author_site": "Zhilin Yang, Ye Yuan, Yuexin Wu, William Cohen, Russ Salakhutdinov", "author": "Zhilin Yang; Ye Yuan; Yuexin Wu; William W. Cohen; Ruslan Salakhutdinov", "abstract": "We propose a novel extension of the encoder-decoder framework, called a review network. The review network is generic and can enhance any existing encoder- decoder model: in this paper, we consider RNN decoders with both CNN and RNN encoders. The review network performs a number of review steps with attention mechanism on the encoder hidden states, and outputs a thought vector after each review step; the thought vectors are used as the input of the attention mechanism in the decoder. We show that conventional encoder-decoders are a special case of our framework. Empirically, we show that our framework improves over state-of- the-art encoder-decoder systems on the tasks of image captioning and source code captioning.", "bibtex": "@inproceedings{NIPS2016_9996535e,\n author = {Yang, Zhilin and Yuan, Ye and Wu, Yuexin and Cohen, William W and Salakhutdinov, Russ R},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Review Networks for Caption Generation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9996535e07258a7bbfd8b132435c5962-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9996535e07258a7bbfd8b132435c5962-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9996535e07258a7bbfd8b132435c5962-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9996535e07258a7bbfd8b132435c5962-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9996535e07258a7bbfd8b132435c5962-Reviews.html", "metareview": "", "pdf_size": 2312281, "gs_citation": 338, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8686235482832878497&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "School of Computer Science; School of Computer Science; School of Computer Science; School of Computer Science; School of Computer Science", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "https://github.com/kimiyoung/review_net", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9996535e07258a7bbfd8b132435c5962-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "School of Computer Science", "aff_unique_dep": "Computer Science", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "Reward Augmented Maximum Likelihood for Neural Structured Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6901", "id": "6901", "author_site": "Mohammad Norouzi, Samy Bengio, zhifeng Chen, Navdeep Jaitly, Mike Schuster, Yonghui Wu, Dale Schuurmans", "author": "Mohammad Norouzi; Samy Bengio; zhifeng Chen; Navdeep Jaitly; Mike Schuster; Yonghui Wu; Dale Schuurmans", "abstract": "A key problem in structured output prediction is enabling direct optimization of the task reward function that matters for test evaluation. This paper presents a simple and computationally efficient method that incorporates task reward into maximum likelihood training. We establish a connection between maximum likelihood and regularized expected reward, showing that they are approximately equivalent in the vicinity of the optimal solution. Then we show how maximum likelihood can be generalized by optimizing the conditional probability of auxiliary outputs that are sampled proportional to their exponentiated scaled rewards. We apply this framework to optimize edit distance in the output space, by sampling from edited targets. Experiments on speech recognition and machine translation for neural sequence to sequence models show notable improvements over maximum likelihood baseline by simply sampling from target output augmentations.", "bibtex": "@inproceedings{NIPS2016_2f885d0f,\n author = {Norouzi, Mohammad and Bengio, Samy and Chen, zhifeng and Jaitly, Navdeep and Schuster, Mike and Wu, Yonghui and Schuurmans, Dale},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Reward Augmented Maximum Likelihood for Neural Structured Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2f885d0fbe2e131bfc9d98363e55d1d4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2f885d0fbe2e131bfc9d98363e55d1d4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2f885d0fbe2e131bfc9d98363e55d1d4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2f885d0fbe2e131bfc9d98363e55d1d4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2f885d0fbe2e131bfc9d98363e55d1d4-Reviews.html", "metareview": "", "pdf_size": 271246, "gs_citation": 263, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8898921461473717664&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 17, "aff": "Google Brain; Google Brain; Google Brain; Google Brain; Google Brain; Google Brain; Google Brain", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2f885d0fbe2e131bfc9d98363e55d1d4-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Brain", "aff_unique_url": "https://brain.google.com", "aff_unique_abbr": "Google Brain", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Riemannian SVRG: Fast Stochastic Optimization on Riemannian Manifolds", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6992", "id": "6992", "author_site": "Hongyi Zhang, Sashank J. Reddi, Suvrit Sra", "author": "Hongyi Zhang; Sashank J. Reddi; Suvrit Sra", "abstract": "We study optimization of finite sums of \\emph{geodesically} smooth functions on Riemannian manifolds. Although variance reduction techniques for optimizing finite-sums have witnessed tremendous attention in the recent years, existing work is limited to vector space problems. We introduce \\emph{Riemannian SVRG} (\\rsvrg), a new variance reduced Riemannian optimization method. We analyze \\rsvrg for both geodesically \\emph{convex} and \\emph{nonconvex} (smooth) functions. Our analysis reveals that \\rsvrg inherits advantages of the usual SVRG method, but with factors depending on curvature of the manifold that influence its convergence. To our knowledge, \\rsvrg is the first \\emph{provably fast} stochastic Riemannian method. Moreover, our paper presents the first non-asymptotic complexity analysis (novel even for the batch setting) for nonconvex Riemannian optimization. Our results have several implications; for instance, they offer a Riemannian perspective on variance reduced PCA, which promises a short, transparent convergence analysis.", "bibtex": "@inproceedings{NIPS2016_98e6f172,\n author = {Zhang, Hongyi and J. Reddi, Sashank and Sra, Suvrit},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Riemannian SVRG: Fast Stochastic Optimization on Riemannian Manifolds},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/98e6f17209029f4ae6dc9d88ec8eac2c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/98e6f17209029f4ae6dc9d88ec8eac2c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/98e6f17209029f4ae6dc9d88ec8eac2c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/98e6f17209029f4ae6dc9d88ec8eac2c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/98e6f17209029f4ae6dc9d88ec8eac2c-Reviews.html", "metareview": "", "pdf_size": 587940, "gs_citation": 313, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15150201267194279139&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/98e6f17209029f4ae6dc9d88ec8eac2c-Abstract.html" }, { "title": "Robust Spectral Detection of Global Structures in the Data by Learning a Regularization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7240", "id": "7240", "author": "Pan Zhang", "abstract": "Spectral methods are popular in detecting global structures in the given data that can be represented as a matrix. However when the data matrix is sparse or noisy, classic spectral methods usually fail to work, due to localization of eigenvectors (or singular vectors) induced by the sparsity or noise. In this work, we propose a general method to solve the localization problem by learning a regularization matrix from the localized eigenvectors. Using matrix perturbation analysis, we demonstrate that the learned regularizations suppress down the eigenvalues associated with localized eigenvectors and enable us to recover the informative eigenvectors representing the global structure. We show applications of our method in several inference problems: community detection in networks, clustering from pairwise similarities, rank estimation and matrix completion problems. Using extensive experiments, we illustrate that our method solves the localization problem and works down to the theoretical detectability limits in different kinds of synthetic data. This is in contrast with existing spectral algorithms based on data matrix, non-backtracking matrix, Laplacians and those with rank-one regularizations, which perform poorly in the sparse case with noise.", "bibtex": "@inproceedings{NIPS2016_34ed066d,\n author = {Zhang, Pan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Robust Spectral Detection of Global Structures in the Data by Learning a Regularization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/34ed066df378efacc9b924ec161e7639-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/34ed066df378efacc9b924ec161e7639-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/34ed066df378efacc9b924ec161e7639-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/34ed066df378efacc9b924ec161e7639-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/34ed066df378efacc9b924ec161e7639-Reviews.html", "metareview": "", "pdf_size": 363929, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7631103271798563665&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Institute of Theoretical Physics, Chinese Academy of Sciences, Beijing 100190, China", "aff_domain": "itp.ac.cn", "email": "itp.ac.cn", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/34ed066df378efacc9b924ec161e7639-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Theoretical Physics", "aff_unique_url": "http://www.itp.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "title": "Robust k-means: a Theoretical Revisit", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6936", "id": "6936", "author": "ALEXANDROS GEORGOGIANNIS", "abstract": "Over the last years, many variations of the quadratic k-means clustering procedure have been proposed, all aiming to robustify the performance of the algorithm in the presence of outliers. In general terms, two main approaches have been developed: one based on penalized regularization methods, and one based on trimming functions. In this work, we present a theoretical analysis of the robustness and consistency properties of a variant of the classical quadratic k-means algorithm, the robust k-means, which borrows ideas from outlier detection in regression. We show that two outliers in a dataset are enough to breakdown this clustering procedure. However, if we focus on \u201cwell-structured\u201d datasets, then robust k-means can recover the underlying cluster structure in spite of the outliers. Finally, we show that, with slight modifications, the most general non-asymptotic results for consistency of quadratic k-means remain valid for this robust variant.", "bibtex": "@inproceedings{NIPS2016_80a8155e,\n author = {GEORGOGIANNIS, ALEXANDROS},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Robust k-means: a Theoretical Revisit},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/80a8155eb153025ea1d513d0b2c4b675-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/80a8155eb153025ea1d513d0b2c4b675-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/80a8155eb153025ea1d513d0b2c4b675-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/80a8155eb153025ea1d513d0b2c4b675-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/80a8155eb153025ea1d513d0b2c4b675-Reviews.html", "metareview": "", "pdf_size": 505730, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1010342535819696547&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "School of Electrical and Computer Engineering, Technical University of Crete, Greece", "aff_domain": "gmail.com", "email": "gmail.com", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/80a8155eb153025ea1d513d0b2c4b675-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Technical University of Crete", "aff_unique_dep": "School of Electrical and Computer Engineering", "aff_unique_url": "https://www.tuc.gr", "aff_unique_abbr": "TUC", "aff_country_unique_index": "0", "aff_country_unique": "Greece" }, { "title": "Robustness of classifiers: from adversarial to random noise", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6980", "id": "6980", "author_site": "Alhussein Fawzi, Seyed-Mohsen Moosavi-Dezfooli, Pascal Frossard", "author": "Alhussein Fawzi; Seyed-Mohsen Moosavi-Dezfooli; Pascal Frossard", "abstract": "Several recent works have shown that state-of-the-art classifiers are vulnerable to worst-case (i.e., adversarial) perturbations of the datapoints. On the other hand, it has been empirically observed that these same classifiers are relatively robust to random noise. In this paper, we propose to study a semi-random noise regime that generalizes both the random and worst-case noise regimes. We propose the first quantitative analysis of the robustness of nonlinear classifiers in this general noise regime. We establish precise theoretical bounds on the robustness of classifiers in this general regime, which depend on the curvature of the classifier's decision boundary. Our bounds confirm and quantify the empirical observations that classifiers satisfying curvature constraints are robust to random noise. Moreover, we quantify the robustness of classifiers in terms of the subspace dimension in the semi-random noise regime, and show that our bounds remarkably interpolate between the worst-case and random noise regimes. We perform experiments and show that the derived bounds provide very accurate estimates when applied to various state-of-the-art deep neural networks and datasets. This result suggests bounds on the curvature of the classifiers' decision boundaries that we support experimentally, and more generally offers important insights onto the geometry of high dimensional classification problems.", "bibtex": "@inproceedings{NIPS2016_7ce3284b,\n author = {Fawzi, Alhussein and Moosavi-Dezfooli, Seyed-Mohsen and Frossard, Pascal},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Robustness of classifiers: from adversarial to random noise},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7ce3284b743aefde80ffd9aec500e085-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7ce3284b743aefde80ffd9aec500e085-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7ce3284b743aefde80ffd9aec500e085-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7ce3284b743aefde80ffd9aec500e085-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7ce3284b743aefde80ffd9aec500e085-Reviews.html", "metareview": "", "pdf_size": 2334344, "gs_citation": 450, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2045380589603960750&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne; \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne; \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch", "email": "epfl.ch;epfl.ch;epfl.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7ce3284b743aefde80ffd9aec500e085-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "EPFL", "aff_unique_dep": "", "aff_unique_url": "https://www.epfl.ch", "aff_unique_abbr": "EPFL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "R\u00e9nyi Divergence Variational Inference", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7340", "id": "7340", "author_site": "Yingzhen Li, Richard Turner", "author": "Yingzhen Li; Richard E Turner", "abstract": "This paper introduces the variational R\u00e9nyi bound (VR) that extends traditional variational inference to R\u00e9nyi's alpha-divergences. This new family of variational methods unifies a number of existing approaches, and enables a smooth interpolation from the evidence lower-bound to the log (marginal) likelihood that is controlled by the value of alpha that parametrises the divergence. The reparameterization trick, Monte Carlo approximation and stochastic optimisation methods are deployed to obtain a tractable and unified framework for optimisation. We further consider negative alpha values and propose a novel variational inference method as a new special case in the proposed framework. Experiments on Bayesian neural networks and variational auto-encoders demonstrate the wide applicability of the VR bound.", "bibtex": "@inproceedings{NIPS2016_7750ca35,\n author = {Li, Yingzhen and Turner, Richard E},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {R\\'{e}nyi Divergence Variational Inference},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7750ca3559e5b8e1f44210283368fc16-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7750ca3559e5b8e1f44210283368fc16-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7750ca3559e5b8e1f44210283368fc16-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7750ca3559e5b8e1f44210283368fc16-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7750ca3559e5b8e1f44210283368fc16-Reviews.html", "metareview": "", "pdf_size": 782452, "gs_citation": 364, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15834800890982427166&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "University of Cambridge; University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk", "email": "cam.ac.uk;cam.ac.uk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7750ca3559e5b8e1f44210283368fc16-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "title": "SDP Relaxation with Randomized Rounding for Energy Disaggregation", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7227", "id": "7227", "author_site": "Kiarash Shaloudegi, Andr\u00e1s Gy\u00f6rgy, Csaba Szepesvari, Wilsun Xu", "author": "Kiarash Shaloudegi; Andr\u00e1s Gy\u00f6rgy; Csaba Szepesvari; Wilsun Xu", "abstract": "We develop a scalable, computationally efficient method for the task of energy disaggregation for home appliance monitoring. In this problem the goal is to estimate the energy consumption of each appliance based on the total energy-consumption signal of a household. The current state of the art models the problem as inference in factorial HMMs, and finds an approximate solution to the resulting quadratic integer program via quadratic programming. Here we take a more principled approach, better suited to integer programming problems, and find an approximate optimum by combining convex semidefinite relaxations with randomized rounding, as well as with a scalable ADMM method that exploits the special structure of the resulting semidefinite program. Simulation results demonstrate the superiority of our methods both in synthetic and real-world datasets.", "bibtex": "@inproceedings{NIPS2016_4de75424,\n author = {Shaloudegi, Kiarash and Gy\\\"{o}rgy, Andr\\'{a}s and Szepesvari, Csaba and Xu, Wilsun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SDP Relaxation with Randomized Rounding for Energy Disaggregation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/4de754248c196c85ee4fbdcee89179bd-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/4de754248c196c85ee4fbdcee89179bd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/4de754248c196c85ee4fbdcee89179bd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/4de754248c196c85ee4fbdcee89179bd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/4de754248c196c85ee4fbdcee89179bd-Reviews.html", "metareview": "", "pdf_size": 588904, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13879468747409769080&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "Imperial College London; Imperial College London; University of Alberta; University of Alberta", "aff_domain": "imperial.ac.uk;imperial.ac.uk;ualberta.ca;ualberta.ca", "email": "imperial.ac.uk;imperial.ac.uk;ualberta.ca;ualberta.ca", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/4de754248c196c85ee4fbdcee89179bd-Abstract.html", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Imperial College London;University of Alberta", "aff_unique_dep": ";", "aff_unique_url": "https://www.imperial.ac.uk;https://www.ualberta.ca", "aff_unique_abbr": "ICL;UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United Kingdom;Canada" }, { "title": "SEBOOST - Boosting Stochastic Learning Using Subspace Optimization Techniques", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7146", "id": "7146", "author_site": "Elad Richardson, Rom Herskovitz, Boris Ginsburg, Michael Zibulevsky", "author": "Elad Richardson; Rom Herskovitz; Boris Ginsburg; Michael Zibulevsky", "abstract": "We present SEBOOST, a technique for boosting the performance of existing stochastic optimization methods. SEBOOST applies a secondary optimization process in the subspace spanned by the last steps and descent directions. The method was inspired by the SESOP optimization method for large-scale problems, and has been adapted for the stochastic learning framework. It can be applied on top of any existing optimization method with no need to tweak the internal algorithm. We show that the method is able to boost the performance of different algorithms, and make them more robust to changes in their hyper-parameters. As the boosting steps of SEBOOST are applied between large sets of descent steps, the additional subspace optimization hardly increases the overall computational burden. We introduce two hyper-parameters that control the balance between the baseline method and the secondary optimization process. The method was evaluated on several deep learning tasks, demonstrating promising results.", "bibtex": "@inproceedings{NIPS2016_ab88b157,\n author = {Richardson, Elad and Herskovitz, Rom and Ginsburg, Boris and Zibulevsky, Michael},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SEBOOST - Boosting Stochastic Learning Using Subspace Optimization Techniques},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ab88b15733f543179858600245108dd8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ab88b15733f543179858600245108dd8-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ab88b15733f543179858600245108dd8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ab88b15733f543179858600245108dd8-Reviews.html", "metareview": "", "pdf_size": 407199, "gs_citation": 11, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5774296431937119306&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Technion, Israel Institute of Technology; Technion, Israel Institute of Technology; Nvidia INC; Technion, Israel Institute of Technology", "aff_domain": "cs.technion.ac.il;cs.technion.ac.il;gmail.com;gmail.com", "email": "cs.technion.ac.il;cs.technion.ac.il;gmail.com;gmail.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ab88b15733f543179858600245108dd8-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Israel Institute of Technology;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.nvidia.com", "aff_unique_abbr": "Technion;NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Israel;United States" }, { "title": "SPALS: Fast Alternating Least Squares via Implicit Leverage Scores Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7354", "id": "7354", "author_site": "Dehua Cheng, Richard Peng, Yan Liu, Kimis Perros", "author": "Dehua Cheng; Richard Peng; Yan Liu; Ioakeim Perros", "abstract": "Tensor CANDECOMP/PARAFAC (CP) decomposition is a powerful but computationally challenging tool in modern data analytics. In this paper, we show ways of sampling intermediate steps of alternating minimization algorithms for computing low rank tensor CP decompositions, leading to the sparse alternating least squares (SPALS) method. Specifically, we sample the the Khatri-Rao product, which arises as an intermediate object during the iterations of alternating least squares. This product captures the interactions between different tensor modes, and form the main computational bottleneck for solving many tensor related tasks. By exploiting the spectral structures of the matrix Khatri-Rao product, we provide efficient access to its statistical leverage scores. When applied to the tensor CP decomposition, our method leads to the first algorithm that runs in sublinear time per-iteration and approximates the output of deterministic alternating least squares algorithms. Empirical evaluations of this approach show significantly speedups over existing randomized and deterministic routines for performing CP decomposition. On a tensor of the size 2.4m by 6.6m by 92k with over 2 billion nonzeros formed by Amazon product reviews, our routine converges in two minutes to the same error as deterministic ALS.", "bibtex": "@inproceedings{NIPS2016_f4f6dce2,\n author = {Cheng, Dehua and Peng, Richard and Liu, Yan and Perros, Ioakeim},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SPALS: Fast Alternating Least Squares via Implicit Leverage Scores Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f4f6dce2f3a0f9dada0c2b5b66452017-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f4f6dce2f3a0f9dada0c2b5b66452017-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f4f6dce2f3a0f9dada0c2b5b66452017-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f4f6dce2f3a0f9dada0c2b5b66452017-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f4f6dce2f3a0f9dada0c2b5b66452017-Reviews.html", "metareview": "", "pdf_size": 556301, "gs_citation": 60, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10610436553269118661&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 6, "aff": "University of Southern California; Georgia Institute of Technology; Georgia Institute of Technology; University of Southern California", "aff_domain": "usc.edu;cc.gatech.edu;gatech.edu;usc.edu", "email": "usc.edu;cc.gatech.edu;gatech.edu;usc.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f4f6dce2f3a0f9dada0c2b5b66452017-Abstract.html", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Southern California;Georgia Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;https://www.gatech.edu", "aff_unique_abbr": "USC;Georgia Tech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "SURGE: Surface Regularized Geometry Estimation from a Single Image", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7096", "id": "7096", "author_site": "Peng Wang, Xiaohui Shen, Bryan Russell, Scott Cohen, Brian Price, Alan Yuille", "author": "Peng Wang; Xiaohui Shen; Bryan Russell; Scott Cohen; Brian Price; Alan L. Yuille", "abstract": "This paper introduces an approach to regularize 2.5D surface normal and depth predictions at each pixel given a single input image. The approach infers and reasons about the underlying 3D planar surfaces depicted in the image to snap predicted normals and depths to inferred planar surfaces, all while maintaining fine detail within objects. Our approach comprises two components: (i) a fourstream convolutional neural network (CNN) where depths, surface normals, and likelihoods of planar region and planar boundary are predicted at each pixel, followed by (ii) a dense conditional random field (DCRF) that integrates the four predictions such that the normals and depths are compatible with each other and regularized by the planar region and planar boundary information. The DCRF is formulated such that gradients can be passed to the surface normal and depth CNNs via backpropagation. In addition, we propose new planar wise metrics to evaluate geometry consistency within planar surfaces, which are more tightly related to dependent 3D editing applications. We show that our regularization yields a 30% relative improvement in planar consistency on the NYU v2 dataset.", "bibtex": "@inproceedings{NIPS2016_65ded535,\n author = {Wang, Peng and Shen, Xiaohui and Russell, Bryan and Cohen, Scott and Price, Brian and Yuille, Alan L},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SURGE: Surface Regularized Geometry Estimation from a Single Image},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/65ded5353c5ee48d0b7d48c591b8f430-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/65ded5353c5ee48d0b7d48c591b8f430-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/65ded5353c5ee48d0b7d48c591b8f430-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/65ded5353c5ee48d0b7d48c591b8f430-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/65ded5353c5ee48d0b7d48c591b8f430-Reviews.html", "metareview": "", "pdf_size": 5597722, "gs_citation": 103, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16395162234734139365&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of California, Los Angeles; Adobe Research; Adobe Research; Adobe Research; Adobe Research; Johns Hopkins University", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/65ded5353c5ee48d0b7d48c591b8f430-Abstract.html", "aff_unique_index": "0;1;1;1;1;2", "aff_unique_norm": "University of California, Los Angeles;Adobe;Johns Hopkins University", "aff_unique_dep": ";Adobe Research;", "aff_unique_url": "https://www.ucla.edu;https://research.adobe.com;https://www.jhu.edu", "aff_unique_abbr": "UCLA;Adobe;JHU", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Safe Exploration in Finite Markov Decision Processes with Gaussian Processes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6946", "id": "6946", "author_site": "Matteo Turchetta, Felix Berkenkamp, Andreas Krause", "author": "Matteo Turchetta; Felix Berkenkamp; Andreas Krause", "abstract": "In classical reinforcement learning agents accept arbitrary short term loss for long term gain when exploring their environment. This is infeasible for safety critical applications such as robotics, where even a single unsafe action may cause system failure or harm the environment. In this paper, we address the problem of safely exploring finite Markov decision processes (MDP). We define safety in terms of an a priori unknown safety constraint that depends on states and actions and satisfies certain regularity conditions expressed via a Gaussian process prior. We develop a novel algorithm, SAFEMDP, for this task and prove that it completely explores the safely reachable part of the MDP without violating the safety constraint. To achieve this, it cautiously explores safe states and actions in order to gain statistical confidence about the safety of unvisited state-action pairs from noisy observations collected while navigating the environment. Moreover, the algorithm explicitly considers reachability when exploring the MDP, ensuring that it does not get stuck in any state with no safe way out. We demonstrate our method on digital terrain models for the task of exploring an unknown map with a rover.", "bibtex": "@inproceedings{NIPS2016_9a49a25d,\n author = {Turchetta, Matteo and Berkenkamp, Felix and Krause, Andreas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Safe Exploration in Finite Markov Decision Processes with Gaussian Processes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9a49a25d845a483fae4be7e341368e36-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9a49a25d845a483fae4be7e341368e36-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9a49a25d845a483fae4be7e341368e36-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9a49a25d845a483fae4be7e341368e36-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9a49a25d845a483fae4be7e341368e36-Reviews.html", "metareview": "", "pdf_size": 997878, "gs_citation": 256, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15261888910039311059&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "ETH Zurich; ETH Zurich; ETH Zurich", "aff_domain": "ethz.ch;ethz.ch;ethz.ch", "email": "ethz.ch;ethz.ch;ethz.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9a49a25d845a483fae4be7e341368e36-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Safe Policy Improvement by Minimizing Robust Baseline Regret", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7309", "id": "7309", "author_site": "Mohammad Ghavamzadeh, Marek Petrik, Yinlam Chow", "author": "Mohammad Ghavamzadeh; Marek Petrik; Yinlam Chow", "abstract": "An important problem in sequential decision-making under uncertainty is to use limited data to compute a safe policy, i.e., a policy that is guaranteed to perform at least as well as a given baseline strategy. In this paper, we develop and analyze a new model-based approach to compute a safe policy when we have access to an inaccurate dynamics model of the system with known accuracy guarantees. Our proposed robust method uses this (inaccurate) model to directly minimize the (negative) regret w.r.t. the baseline policy. Contrary to the existing approaches, minimizing the regret allows one to improve the baseline policy in states with accurate dynamics and seamlessly fall back to the baseline policy, otherwise. We show that our formulation is NP-hard and propose an approximate algorithm. Our empirical results on several domains show that even this relatively simple approximate algorithm can significantly outperform standard approaches.", "bibtex": "@inproceedings{NIPS2016_9a3d4583,\n author = {Ghavamzadeh, Mohammad and Petrik, Marek and Chow, Yinlam},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Safe Policy Improvement by Minimizing Robust Baseline Regret},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9a3d458322d70046f63dfd8b0153ece4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9a3d458322d70046f63dfd8b0153ece4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9a3d458322d70046f63dfd8b0153ece4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9a3d458322d70046f63dfd8b0153ece4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9a3d458322d70046f63dfd8b0153ece4-Reviews.html", "metareview": "", "pdf_size": 456693, "gs_citation": 167, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17333623060919029133&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "University of New Hampshire; Adobe Research + INRIA Lille; Stanford University", "aff_domain": "cs.unh.edu;adobe.com;stanford.edu", "email": "cs.unh.edu;adobe.com;stanford.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9a3d458322d70046f63dfd8b0153ece4-Abstract.html", "aff_unique_index": "0;1+2;3", "aff_unique_norm": "University of New Hampshire;Adobe;INRIA;Stanford University", "aff_unique_dep": ";Adobe Research;;", "aff_unique_url": "https://www.unh.edu;https://research.adobe.com;https://www.inria.fr;https://www.stanford.edu", "aff_unique_abbr": "UNH;Adobe;INRIA;Stanford", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Lille;Stanford", "aff_country_unique_index": "0;0+1;0", "aff_country_unique": "United States;France" }, { "title": "Safe and Efficient Off-Policy Reinforcement Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7011", "id": "7011", "author_site": "Remi Munos, Tom Stepleton, Anna Harutyunyan, Marc Bellemare", "author": "Remi Munos; Tom Stepleton; Anna Harutyunyan; Marc Bellemare", "abstract": "In this work, we take a fresh look at some old and new algorithms for off-policy, return-based reinforcement learning. Expressing these in a common form, we derive a novel algorithm, Retrace(lambda), with three desired properties: (1) it has low variance; (2) it safely uses samples collected from any behaviour policy, whatever its degree of \"off-policyness\"; and (3) it is efficient as it makes the best use of samples collected from near on-policy behaviour policies. We analyse the contractive nature of the related operator under both off-policy policy evaluation and control settings and derive online sample-based algorithms. We believe this is the first return-based off-policy control algorithm converging a.s. to Q* without the GLIE assumption (Greedy in the Limit with Infinite Exploration). As a corollary, we prove the convergence of Watkins' Q(lambda), which was an open problem since 1989. We illustrate the benefits of Retrace(lambda) on a standard suite of Atari 2600 games.", "bibtex": "@inproceedings{NIPS2016_c3992e9a,\n author = {Munos, Remi and Stepleton, Tom and Harutyunyan, Anna and Bellemare, Marc},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Safe and Efficient Off-Policy Reinforcement Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c3992e9a68c5ae12bd18488bc579b30d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c3992e9a68c5ae12bd18488bc579b30d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c3992e9a68c5ae12bd18488bc579b30d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c3992e9a68c5ae12bd18488bc579b30d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c3992e9a68c5ae12bd18488bc579b30d-Reviews.html", "metareview": "", "pdf_size": 309421, "gs_citation": 772, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10236232618386583112&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": "Google DeepMind; Google DeepMind; Vrije Universiteit Brussel; Google DeepMind", "aff_domain": "google.com;google.com;vub.ac.be;google.com", "email": "google.com;google.com;vub.ac.be;google.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c3992e9a68c5ae12bd18488bc579b30d-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Google;Vrije Universiteit Brussel", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.vub.be", "aff_unique_abbr": "DeepMind;VUB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Brussels", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United Kingdom;Belgium" }, { "title": "Sample Complexity of Automated Mechanism Design", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7013", "id": "7013", "author_site": "Maria-Florina Balcan, Tuomas Sandholm, Ellen Vitercik", "author": "Maria-Florina F Balcan; Tuomas Sandholm; Ellen Vitercik", "abstract": "The design of revenue-maximizing combinatorial auctions, i.e. multi item auctions over bundles of goods, is one of the most fundamental problems in computational economics, unsolved even for two bidders and two items for sale. In the traditional economic models, it is assumed that the bidders' valuations are drawn from an underlying distribution and that the auction designer has perfect knowledge of this distribution. Despite this strong and oftentimes unrealistic assumption, it is remarkable that the revenue-maximizing combinatorial auction remains unknown. In recent years, automated mechanism design has emerged as one of the most practical and promising approaches to designing high-revenue combinatorial auctions. The most scalable automated mechanism design algorithms take as input samples from the bidders' valuation distribution and then search for a high-revenue auction in a rich auction class. In this work, we provide the first sample complexity analysis for the standard hierarchy of deterministic combinatorial auction classes used in automated mechanism design. In particular, we provide tight sample complexity bounds on the number of samples needed to guarantee that the empirical revenue of the designed mechanism on the samples is close to its expected revenue on the underlying, unknown distribution over bidder valuations, for each of the auction classes in the hierarchy. In addition to helping set automated mechanism design on firm foundations, our results also push the boundaries of learning theory. In particular, the hypothesis functions used in our contexts are defined through multi stage combinatorial optimization procedures, rather than simple decision boundaries, as are common in machine learning.", "bibtex": "@inproceedings{NIPS2016_c667d53a,\n author = {Balcan, Maria-Florina F and Sandholm, Tuomas and Vitercik, Ellen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sample Complexity of Automated Mechanism Design},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c667d53acd899a97a85de0c201ba99be-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c667d53acd899a97a85de0c201ba99be-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c667d53acd899a97a85de0c201ba99be-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c667d53acd899a97a85de0c201ba99be-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c667d53acd899a97a85de0c201ba99be-Reviews.html", "metareview": "", "pdf_size": 402781, "gs_citation": 89, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7006342006398523326&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 7, "aff": "School of Computer Science, Carnegie Mellon University; School of Computer Science, Carnegie Mellon University; School of Computer Science, Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c667d53acd899a97a85de0c201ba99be-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "School of Computer Science", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sampling for Bayesian Program Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6950", "id": "6950", "author_site": "Kevin Ellis, Armando Solar-Lezama, Josh Tenenbaum", "author": "Kevin Ellis; Armando Solar-Lezama; Josh Tenenbaum", "abstract": "Towards learning programs from data, we introduce the problem of sampling programs from posterior distributions conditioned on that data. Within this setting, we propose an algorithm that uses a symbolic solver to efficiently sample programs. The proposal combines constraint-based program synthesis with sampling via random parity constraints. We give theoretical guarantees on how well the samples approximate the true posterior, and have empirical results showing the algorithm is efficient in practice, evaluating our approach on 22 program learning problems in the domains of text editing and computer-aided programming.", "bibtex": "@inproceedings{NIPS2016_afd48367,\n author = {Ellis, Kevin and Solar-Lezama, Armando and Tenenbaum, Josh},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sampling for Bayesian Program Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/afd4836712c5e77550897e25711e1d96-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/afd4836712c5e77550897e25711e1d96-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/afd4836712c5e77550897e25711e1d96-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/afd4836712c5e77550897e25711e1d96-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/afd4836712c5e77550897e25711e1d96-Reviews.html", "metareview": "", "pdf_size": 392907, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11735026681675007905&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Brain and Cognitive Sciences, MIT; CSAIL, MIT; Brain and Cognitive Sciences, MIT", "aff_domain": "mit.edu;csail.mit.edu;mit.edu", "email": "mit.edu;csail.mit.edu;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/afd4836712c5e77550897e25711e1d96-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "Department of Brain and Cognitive Sciences", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Satisfying Real-world Goals with Dataset Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7071", "id": "7071", "author_site": "Gabriel Goh, Andrew Cotter, Maya Gupta, Michael P Friedlander", "author": "Gabriel Goh; Andrew Cotter; Maya Gupta; Michael P Friedlander", "abstract": "The goal of minimizing misclassification error on a training set is often just one of several real-world goals that might be defined on different datasets. For example, one may require a classifier to also make positive predictions at some specified rate for some subpopulation (fairness), or to achieve a specified empirical recall. Other real-world goals include reducing churn with respect to a previously deployed model, or stabilizing online training. In this paper we propose handling multiple goals on multiple datasets by training with dataset constraints, using the ramp penalty to accurately quantify costs, and present an efficient algorithm to approximately optimize the resulting non-convex constrained optimization problem. Experiments on both benchmark and real-world industry datasets demonstrate the effectiveness of our approach.", "bibtex": "@inproceedings{NIPS2016_dc4c44f6,\n author = {Goh, Gabriel and Cotter, Andrew and Gupta, Maya and Friedlander, Michael P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Satisfying Real-world Goals with Dataset Constraints},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/dc4c44f624d600aa568390f1f1104aa0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/dc4c44f624d600aa568390f1f1104aa0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/dc4c44f624d600aa568390f1f1104aa0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/dc4c44f624d600aa568390f1f1104aa0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/dc4c44f624d600aa568390f1f1104aa0-Reviews.html", "metareview": "", "pdf_size": 405077, "gs_citation": 270, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=273651653114178184&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "Dept. of Mathematics, UC Davis; Google Inc.; Google Inc.; Dept. of Computer Science, University of British Columbia", "aff_domain": "math.ucdavis.edu;google.com;google.com;cs.ubc.ca", "email": "math.ucdavis.edu;google.com;google.com;cs.ubc.ca", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/dc4c44f624d600aa568390f1f1104aa0-Abstract.html", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of California, Davis;Google;University of British Columbia", "aff_unique_dep": "Department of Mathematics;Google;Department of Computer Science", "aff_unique_url": "https://www.ucdavis.edu;https://www.google.com;https://www.ubc.ca", "aff_unique_abbr": "UC Davis;Google;UBC", "aff_campus_unique_index": "0;1;1;2", "aff_campus_unique": "Davis;Mountain View;Vancouver", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "United States;Canada" }, { "title": "Scalable Adaptive Stochastic Optimization Using Random Projections", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7247", "id": "7247", "author_site": "Gabriel Krummenacher, Brian McWilliams, Yannic Kilcher, Joachim M Buhmann, Nicolai Meinshausen", "author": "Gabriel Krummenacher; Brian McWilliams; Yannic Kilcher; Joachim M Buhmann; Nicolai Meinshausen", "abstract": "Adaptive stochastic gradient methods such as AdaGrad have gained popularity in particular for training deep neural networks. The most commonly used and studied variant maintains a diagonal matrix approximation to second order information by accumulating past gradients which are used to tune the step size adaptively. In certain situations the full-matrix variant of AdaGrad is expected to attain better performance, however in high dimensions it is computationally impractical. We present Ada-LR and RadaGrad two computationally efficient approximations to full-matrix AdaGrad based on randomized dimensionality reduction. They are able to capture dependencies between features and achieve similar performance to full-matrix AdaGrad but at a much smaller computational cost. We show that the regret of Ada-LR is close to the regret of full-matrix AdaGrad which can have an up-to exponentially smaller dependence on the dimension than the diagonal variant. Empirically, we show that Ada-LR and RadaGrad perform similarly to full-matrix AdaGrad. On the task of training convolutional neural networks as well as recurrent neural networks, RadaGrad achieves faster convergence than diagonal AdaGrad.", "bibtex": "@inproceedings{NIPS2016_a3d68b46,\n author = {Krummenacher, Gabriel and McWilliams, Brian and Kilcher, Yannic and Buhmann, Joachim M and Meinshausen, Nicolai},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scalable Adaptive Stochastic Optimization Using Random Projections},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a3d68b461bd9d3533ee1dd3ce4628ed4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a3d68b461bd9d3533ee1dd3ce4628ed4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a3d68b461bd9d3533ee1dd3ce4628ed4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a3d68b461bd9d3533ee1dd3ce4628ed4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a3d68b461bd9d3533ee1dd3ce4628ed4-Reviews.html", "metareview": "", "pdf_size": 549686, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11127417727893127886&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": ";;;;", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a3d68b461bd9d3533ee1dd3ce4628ed4-Abstract.html" }, { "title": "Scaled Least Squares Estimator for GLMs in Large-Scale Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6962", "id": "6962", "author_site": "Murat Erdogdu, Lee H Dicker, Mohsen Bayati", "author": "Murat A Erdogdu; Lee H Dicker; Mohsen Bayati", "abstract": "We study the problem of efficiently estimating the coefficients of generalized linear models (GLMs) in the large-scale setting where the number of observations $n$ is much larger than the number of predictors $p$, i.e. $n\\gg p \\gg 1$. We show that in GLMs with random (not necessarily Gaussian) design, the GLM coefficients are approximately proportional to the corresponding ordinary least squares (OLS) coefficients. Using this relation, we design an algorithm that achieves the same accuracy as the maximum likelihood estimator (MLE) through iterations that attain up to a cubic convergence rate, and that are cheaper than any batch optimization algorithm by at least a factor of $\\mathcal{O}(p)$. We provide theoretical guarantees for our algorithm, and analyze the convergence behavior in terms of data dimensions. % Finally, we demonstrate the performance of our algorithm through extensive numerical studies on large-scale real and synthetic datasets, and show that it achieves the highest performance compared to several other widely used optimization algorithms.", "bibtex": "@inproceedings{NIPS2016_e1696007,\n author = {Erdogdu, Murat A and Dicker, Lee H and Bayati, Mohsen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scaled Least Squares Estimator for GLMs in Large-Scale Problems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e1696007be4eefb81b1a1d39ce48681b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e1696007be4eefb81b1a1d39ce48681b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/e1696007be4eefb81b1a1d39ce48681b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e1696007be4eefb81b1a1d39ce48681b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e1696007be4eefb81b1a1d39ce48681b-Reviews.html", "metareview": "", "pdf_size": 2128396, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=810705174020432618&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Statistics, Stanford University; Graduate School of Business, Stanford University; Department of Statistics and Biostatistics, Rutgers University + Amazon", "aff_domain": "stanford.edu;stanford.edu;stat.rutgers.edu", "email": "stanford.edu;stanford.edu;stat.rutgers.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e1696007be4eefb81b1a1d39ce48681b-Abstract.html", "aff_unique_index": "0;0;1+2", "aff_unique_norm": "Stanford University;Rutgers University;Amazon", "aff_unique_dep": "Department of Statistics;Department of Statistics and Biostatistics;Amazon.com, Inc.", "aff_unique_url": "https://www.stanford.edu;https://www.rutgers.edu;https://www.amazon.com", "aff_unique_abbr": "Stanford;Rutgers;Amazon", "aff_campus_unique_index": "0;0;", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0+0", "aff_country_unique": "United States" }, { "title": "Scaling Factorial Hidden Markov Models: Stochastic Variational Inference without Messages", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7006", "id": "7006", "author_site": "Yin Cheng Ng, Pawel M Chilinski, Ricardo Silva", "author": "Yin Cheng Ng; Pawel M Chilinski; Ricardo Silva", "abstract": "Factorial Hidden Markov Models (FHMMs) are powerful models for sequential data but they do not scale well with long sequences. We propose a scalable inference and learning algorithm for FHMMs that draws on ideas from the stochastic variational inference, neural network and copula literatures. Unlike existing approaches, the proposed algorithm requires no message passing procedure among latent variables and can be distributed to a network of computers to speed up learning. Our experiments corroborate that the proposed algorithm does not introduce further approximation bias compared to the proven structured mean-field algorithm, and achieves better performance with long sequences and large FHMMs.", "bibtex": "@inproceedings{NIPS2016_7b7a53e2,\n author = {Ng, Yin Cheng and Chilinski, Pawel M and Silva, Ricardo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scaling Factorial Hidden Markov Models: Stochastic Variational Inference without Messages},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7b7a53e239400a13bd6be6c91c4f6c4e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7b7a53e239400a13bd6be6c91c4f6c4e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7b7a53e239400a13bd6be6c91c4f6c4e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7b7a53e239400a13bd6be6c91c4f6c4e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7b7a53e239400a13bd6be6c91c4f6c4e-Reviews.html", "metareview": "", "pdf_size": 514265, "gs_citation": 19, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14142172350826997081&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Dept. of Statistical Science, University College London; Dept. of Computing Science, University College London; Dept. of Statistical Science, University College London", "aff_domain": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "email": "ucl.ac.uk;ucl.ac.uk;ucl.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7b7a53e239400a13bd6be6c91c4f6c4e-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University College London", "aff_unique_dep": "Dept. of Statistical Science", "aff_unique_url": "https://www.ucl.ac.uk", "aff_unique_abbr": "UCL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "London", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Scaling Memory-Augmented Neural Networks with Sparse Reads and Writes", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7245", "id": "7245", "author_site": "Jack Rae, Jonathan J Hunt, Ivo Danihelka, Tim Harley, Andrew Senior, Gregory Wayne, Alex Graves, Timothy Lillicrap", "author": "Jack Rae; Jonathan J Hunt; Ivo Danihelka; Timothy Harley; Andrew W. Senior; Gregory Wayne; Alex Graves; Timothy Lillicrap", "abstract": "Neural networks augmented with external memory have the ability to learn algorithmic solutions to complex tasks. These models appear promising for applications such as language modeling and machine translation. However, they scale poorly in both space and time as the amount of memory grows --- limiting their applicability to real-world domains. Here, we present an end-to-end differentiable memory access scheme, which we call Sparse Access Memory (SAM), that retains the representational power of the original approaches whilst training efficiently with very large memories. We show that SAM achieves asymptotic lower bounds in space and time complexity, and find that an implementation runs $1,\\!000\\times$ faster and with $3,\\!000\\times$ less physical memory than non-sparse models. SAM learns with comparable data efficiency to existing models on a range of synthetic tasks and one-shot Omniglot character recognition, and can scale to tasks requiring $100,\\!000$s of time steps and memories. As well, we show how our approach can be adapted for models that maintain temporal associations between memories, as with the recently introduced Differentiable Neural Computer.", "bibtex": "@inproceedings{NIPS2016_3fab5890,\n author = {Rae, Jack and Hunt, Jonathan J and Danihelka, Ivo and Harley, Timothy and Senior, Andrew W and Wayne, Gregory and Graves, Alex and Lillicrap, Timothy},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scaling Memory-Augmented Neural Networks with Sparse Reads and Writes},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3fab5890d8113d0b5a4178201dc842ad-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3fab5890d8113d0b5a4178201dc842ad-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3fab5890d8113d0b5a4178201dc842ad-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3fab5890d8113d0b5a4178201dc842ad-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3fab5890d8113d0b5a4178201dc842ad-Reviews.html", "metareview": "", "pdf_size": 692207, "gs_citation": 196, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5913255713498338191&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 8, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3fab5890d8113d0b5a4178201dc842ad-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Scan Order in Gibbs Sampling: Models in Which it Matters and Bounds on How Much", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7098", "id": "7098", "author_site": "Bryan He, Christopher M De Sa, Ioannis Mitliagkas, Christopher R\u00e9", "author": "Bryan D He; Christopher M De Sa; Ioannis Mitliagkas; Christopher R\u00e9", "abstract": "Gibbs sampling is a Markov Chain Monte Carlo sampling technique that iteratively samples variables from their conditional distributions. There are two common scan orders for the variables: random scan and systematic scan. Due to the benefits of locality in hardware, systematic scan is commonly used, even though most statistical guarantees are only for random scan. While it has been conjectured that the mixing times of random scan and systematic scan do not differ by more than a logarithmic factor, we show by counterexample that this is not the case, and we prove that that the mixing times do not differ by more than a polynomial factor under mild conditions. To prove these relative bounds, we introduce a method of augmenting the state space to study systematic scan using conductance.", "bibtex": "@inproceedings{NIPS2016_e4da3b7f,\n author = {He, Bryan D and De Sa, Christopher M and Mitliagkas, Ioannis and R\\'{e}, Christopher},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Scan Order in Gibbs Sampling: Models in Which it Matters and Bounds on How Much},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e4da3b7fbbce2345d7772b0674a318d5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e4da3b7fbbce2345d7772b0674a318d5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/e4da3b7fbbce2345d7772b0674a318d5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e4da3b7fbbce2345d7772b0674a318d5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e4da3b7fbbce2345d7772b0674a318d5-Reviews.html", "metareview": "", "pdf_size": 352292, "gs_citation": 52, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9670062595170958175&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 15, "aff": "Stanford University; Stanford University; Stanford University; Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e4da3b7fbbce2345d7772b0674a318d5-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Search Improves Label for Active Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7015", "id": "7015", "author_site": "Alina Beygelzimer, Daniel Hsu, John Langford, Chicheng Zhang", "author": "Alina Beygelzimer; Daniel J. Hsu; John Langford; Chicheng Zhang", "abstract": "We investigate active learning with access to two distinct oracles: LABEL (which is standard) and SEARCH (which is not). The SEARCH oracle models the situation where a human searches a database to seed or counterexample an existing solution. SEARCH is stronger than LABEL while being natural to implement in many situations. We show that an algorithm using both oracles can provide exponentially large problem-dependent improvements over LABEL alone.", "bibtex": "@inproceedings{NIPS2016_4f398cb9,\n author = {Beygelzimer, Alina and Hsu, Daniel J and Langford, John and Zhang, Chicheng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Search Improves Label for Active Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/4f398cb9d6bc79ae567298335b51ba8a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/4f398cb9d6bc79ae567298335b51ba8a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/4f398cb9d6bc79ae567298335b51ba8a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/4f398cb9d6bc79ae567298335b51ba8a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/4f398cb9d6bc79ae567298335b51ba8a-Reviews.html", "metareview": "", "pdf_size": 281959, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9728010300360465073&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Yahoo Research, New York, NY; Columbia University, New York, NY; Microsoft Research, New York, NY; UC San Diego, La Jolla, CA", "aff_domain": "yahoo-inc.com;cs.columbia.edu;microsoft.com;cs.ucsd.edu", "email": "yahoo-inc.com;cs.columbia.edu;microsoft.com;cs.ucsd.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/4f398cb9d6bc79ae567298335b51ba8a-Abstract.html", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Yahoo Research;Columbia University;Microsoft;University of California, San Diego", "aff_unique_dep": ";;Microsoft Research;", "aff_unique_url": "https://research.yahoo.com;https://www.columbia.edu;https://www.microsoft.com/en-us/research;https://ucsd.edu", "aff_unique_abbr": "Yahoo Res.;Columbia;MSR;UCSD", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "New York;La Jolla", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Select-and-Sample for Spike-and-Slab Sparse Coding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7351", "id": "7351", "author_site": "Abdul-Saboor Sheikh, J\u00f6rg L\u00fccke", "author": "Abdul-Saboor Sheikh; J\u00f6rg L\u00fccke", "abstract": "Probabilistic inference serves as a popular model for neural processing. It is still unclear, however, how approximate probabilistic inference can be accurate and scalable to very high-dimensional continuous latent spaces. Especially as typical posteriors for sensory data can be expected to exhibit complex latent dependencies including multiple modes. Here, we study an approach that can efficiently be scaled while maintaining a richly structured posterior approximation under these conditions. As example model we use spike-and-slab sparse coding for V1 processing, and combine latent subspace selection with Gibbs sampling (select-and-sample). Unlike factored variational approaches, the method can maintain large numbers of posterior modes and complex latent dependencies. Unlike pure sampling, the method is scalable to very high-dimensional latent spaces. Among all sparse coding approaches with non-trivial posterior approximations (MAP or ICA-like models), we report the largest-scale results. In applications we firstly verify the approach by showing competitiveness in standard denoising benchmarks. Secondly, we use its scalability to, for the first time, study highly-overcomplete settings for V1 encoding using sophisticated posterior representations. More generally, our study shows that very accurate probabilistic inference for multi-modal posteriors with complex dependencies is tractable, functionally desirable and consistent with models for neural inference.", "bibtex": "@inproceedings{NIPS2016_de03beff,\n author = {Sheikh, Abdul-Saboor and L\\\"{u}cke, J\\\"{o}rg},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Select-and-Sample for Spike-and-Slab Sparse Coding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/de03beffeed9da5f3639a621bcab5dd4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/de03beffeed9da5f3639a621bcab5dd4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/de03beffeed9da5f3639a621bcab5dd4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/de03beffeed9da5f3639a621bcab5dd4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/de03beffeed9da5f3639a621bcab5dd4-Reviews.html", "metareview": "", "pdf_size": 913121, "gs_citation": 18, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7709281925627748428&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Technical University of Berlin, Germany + Cluster of Excellence Hearing4all, University of Oldenburg, Germany + SAP Innovation Center Network, Berlin; Research Center Neurosensory Science + Cluster of Excellence Hearing4all + Dept. of Medical Physics and Acoustics, University of Oldenburg, Germany", "aff_domain": "gmail.com;uol.de", "email": "gmail.com;uol.de", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/de03beffeed9da5f3639a621bcab5dd4-Abstract.html", "aff_unique_index": "0+1+2;3+4+1", "aff_unique_norm": "Technical University of Berlin;University of Oldenburg;SAP Innovation Center Network;Research Center Neurosensory Science;Cluster of Excellence Hearing4all", "aff_unique_dep": ";Cluster of Excellence Hearing4all;;;", "aff_unique_url": "https://www.tu-berlin.de;https://www.uol.de;https://www.sap.com;;", "aff_unique_abbr": "TUB;;SAP;;", "aff_campus_unique_index": "1;", "aff_campus_unique": ";Berlin", "aff_country_unique_index": "0+0+0;0+0", "aff_country_unique": "Germany;" }, { "title": "Selective inference for group-sparse linear models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7393", "id": "7393", "author_site": "Fan Yang, Rina Barber, Prateek Jain, John Lafferty", "author": "Fan Yang; Rina Foygel Barber; Prateek Jain; John Lafferty", "abstract": "We develop tools for selective inference in the setting of group sparsity, including the construction of confidence intervals and p-values for testing selected groups of variables. Our main technical result gives the precise distribution of the magnitude of the projection of the data onto a given subspace, and enables us to develop inference procedures for a broad class of group-sparse selection methods, including the group lasso, iterative hard thresholding, and forward stepwise regression. We give numerical results to illustrate these tools on simulated data and on health record data.", "bibtex": "@inproceedings{NIPS2016_7c82fab8,\n author = {Yang, Fan and Foygel Barber, Rina and Jain, Prateek and Lafferty, John},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Selective inference for group-sparse linear models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7c82fab8c8f89124e2ce92984e04fb40-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7c82fab8c8f89124e2ce92984e04fb40-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7c82fab8c8f89124e2ce92984e04fb40-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7c82fab8c8f89124e2ce92984e04fb40-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7c82fab8c8f89124e2ce92984e04fb40-Reviews.html", "metareview": "", "pdf_size": 888555, "gs_citation": 77, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11825085994225068761&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Statistics, University of Chicago; Department of Statistics, University of Chicago; Microsoft Research India; Depts. of Statistics and Computer Science, University of Chicago", "aff_domain": "uchicago.edu;uchicago.edu;microsoft.com;galton.uchicago.edu", "email": "uchicago.edu;uchicago.edu;microsoft.com;galton.uchicago.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7c82fab8c8f89124e2ce92984e04fb40-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Chicago;Microsoft", "aff_unique_dep": "Department of Statistics;Microsoft Research India", "aff_unique_url": "https://www.uchicago.edu;https://www.microsoft.com/en-us/research/group/microsoft-research-india", "aff_unique_abbr": "UChicago;MSR India", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;India" }, { "title": "Semiparametric Differential Graph Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7153", "id": "7153", "author_site": "Pan Xu, Quanquan Gu", "author": "Pan Xu; Quanquan Gu", "abstract": "In many cases of network analysis, it is more attractive to study how a network varies under different conditions than an individual static network. We propose a novel graphical model, namely Latent Differential Graph Model, where the networks under two different conditions are represented by two semiparametric elliptical distributions respectively, and the variation of these two networks (i.e., differential graph) is characterized by the difference between their latent precision matrices. We propose an estimator for the differential graph based on quasi likelihood maximization with nonconvex regularization. We show that our estimator attains a faster statistical rate in parameter estimation than the state-of-the-art methods, and enjoys oracle property under mild conditions. Thorough experiments on both synthetic and real world data support our theory.", "bibtex": "@inproceedings{NIPS2016_f76a89f0,\n author = {Xu, Pan and Gu, Quanquan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Semiparametric Differential Graph Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f76a89f0cb91bc419542ce9fa43902dc-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f76a89f0cb91bc419542ce9fa43902dc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f76a89f0cb91bc419542ce9fa43902dc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f76a89f0cb91bc419542ce9fa43902dc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f76a89f0cb91bc419542ce9fa43902dc-Reviews.html", "metareview": "", "pdf_size": 588460, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17213072487127505521&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "University of Virginia; University of Virginia", "aff_domain": "virginia.edu;virginia.edu", "email": "virginia.edu;virginia.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f76a89f0cb91bc419542ce9fa43902dc-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Virginia", "aff_unique_dep": "", "aff_unique_url": "https://www.virginia.edu", "aff_unique_abbr": "UVA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Sequential Neural Models with Stochastic Layers", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7212", "id": "7212", "author_site": "Marco Fraccaro, S\u00f8ren Kaae S\u00f8nderby, Ulrich Paquet, Ole Winther", "author": "Marco Fraccaro; S\u00f8ren Kaae S\u00f8nderby; Ulrich Paquet; Ole Winther", "abstract": "How can we efficiently propagate uncertainty in a latent state representation with recurrent neural networks? This paper introduces stochastic recurrent neural networks which glue a deterministic recurrent neural network and a state space model together to form a stochastic and sequential neural generative model. The clear separation of deterministic and stochastic layers allows a structured variational inference network to track the factorization of the model\u2019s posterior distribution. By retaining both the nonlinear recursive structure of a recurrent neural network and averaging over the uncertainty in a latent path, like a state space model, we improve the state of the art results on the Blizzard and TIMIT speech modeling data sets by a large margin, while achieving comparable performances to competing methods on polyphonic music modeling.", "bibtex": "@inproceedings{NIPS2016_208e43f0,\n author = {Fraccaro, Marco and S\\o nderby, S\\o ren Kaae and Paquet, Ulrich and Winther, Ole},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sequential Neural Models with Stochastic Layers},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/208e43f0e45c4c78cafadb83d2888cb6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/208e43f0e45c4c78cafadb83d2888cb6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/208e43f0e45c4c78cafadb83d2888cb6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/208e43f0e45c4c78cafadb83d2888cb6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/208e43f0e45c4c78cafadb83d2888cb6-Reviews.html", "metareview": "", "pdf_size": 498900, "gs_citation": 505, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8634151674822957659&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 14, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/208e43f0e45c4c78cafadb83d2888cb6-Abstract.html" }, { "title": "Short-Dot: Computing Large Linear Transforms Distributedly Using Coded Short Dot Products", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7185", "id": "7185", "author_site": "Sanghamitra Dutta, Viveck Cadambe, Pulkit Grover", "author": "Sanghamitra Dutta; Viveck Cadambe; Pulkit Grover", "abstract": "Faced with saturation of Moore's law and increasing size and dimension of data, system designers have increasingly resorted to parallel and distributed computing to reduce computation time of machine-learning algorithms. However, distributed computing is often bottle necked by a small fraction of slow processors called \"stragglers\" that reduce the speed of computation because the fusion node has to wait for all processors to complete their processing. To combat the effect of stragglers, recent literature proposes introducing redundancy in computations across processors, e.g., using repetition-based strategies or erasure codes. The fusion node can exploit this redundancy by completing the computation using outputs from only a subset of the processors, ignoring the stragglers. In this paper, we propose a novel technique - that we call \"Short-Dot\" - to introduce redundant computations in a coding theory inspired fashion, for computing linear transforms of long vectors. Instead of computing long dot products as required in the original linear transform, we construct a larger number of redundant and short dot products that can be computed more efficiently at individual processors. Further, only a subset of these short dot products are required at the fusion node to finish the computation successfully. We demonstrate through probabilistic analysis as well as experiments on computing clusters that Short-Dot offers significant speed-up compared to existing techniques. We also derive trade-offs between the length of the dot-products and the resilience to stragglers (number of processors required to finish), for any such strategy and compare it to that achieved by our strategy.", "bibtex": "@inproceedings{NIPS2016_aace49c7,\n author = {Dutta, Sanghamitra and Cadambe, Viveck and Grover, Pulkit},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Short-Dot: Computing Large Linear Transforms Distributedly Using Coded Short Dot Products},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/aace49c7d80767cffec0e513ae886df0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/aace49c7d80767cffec0e513ae886df0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/aace49c7d80767cffec0e513ae886df0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/aace49c7d80767cffec0e513ae886df0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/aace49c7d80767cffec0e513ae886df0-Reviews.html", "metareview": "", "pdf_size": 986460, "gs_citation": 446, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9305637963414930867&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Carnegie Mellon University; Pennsylvania State University; Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;engr.psu.edu;andrew.cmu.edu", "email": "andrew.cmu.edu;engr.psu.edu;andrew.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/aace49c7d80767cffec0e513ae886df0-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.psu.edu", "aff_unique_abbr": "CMU;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Showing versus doing: Teaching by demonstration", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7429", "id": "7429", "author_site": "Mark Ho, Michael Littman, James MacGlashan, Fiery Cushman, Joe Austerweil, Joseph L Austerweil", "author": "Mark K Ho; Michael Littman; James MacGlashan; Fiery Cushman; Joseph L Austerweil", "abstract": "People often learn from others' demonstrations, and classic inverse reinforcement learning (IRL) algorithms have brought us closer to realizing this capacity in machines. In contrast, teaching by demonstration has been less well studied computationally. Here, we develop a novel Bayesian model for teaching by demonstration. Stark differences arise when demonstrators are intentionally teaching a task versus simply performing a task. In two experiments, we show that human participants systematically modify their teaching behavior consistent with the predictions of our model. Further, we show that even standard IRL algorithms benefit when learning from behaviors that are intentionally pedagogical. We conclude by discussing IRL algorithms that can take advantage of intentional pedagogy.", "bibtex": "@inproceedings{NIPS2016_b5488aef,\n author = {Ho, Mark K and Littman, Michael and MacGlashan, James and Cushman, Fiery and Austerweil, Joseph L},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Showing versus doing: Teaching by demonstration},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b5488aeff42889188d03c9895255cecc-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b5488aeff42889188d03c9895255cecc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b5488aeff42889188d03c9895255cecc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b5488aeff42889188d03c9895255cecc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b5488aeff42889188d03c9895255cecc-Reviews.html", "metareview": "", "pdf_size": 2790364, "gs_citation": 143, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15884978826719410907&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "Department of Cognitive, Linguistic, and Psychological Sciences, Brown University; Department of Computer Science, Brown University; Department of Computer Science, Brown University; Department of Psychology, Harvard University; Department of Psychology, University of Wisconsin-Madison", "aff_domain": "brown.edu;cs.brown.edu;brown.edu;fas.harvard.edu;wisc.edu", "email": "brown.edu;cs.brown.edu;brown.edu;fas.harvard.edu;wisc.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b5488aeff42889188d03c9895255cecc-Abstract.html", "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Brown University;Harvard University;University of Wisconsin-Madison", "aff_unique_dep": "Department of Cognitive, Linguistic, and Psychological Sciences;Department of Psychology;Department of Psychology", "aff_unique_url": "https://www.brown.edu;https://www.harvard.edu;https://www.wisc.edu", "aff_unique_abbr": "Brown;Harvard;UW-Madison", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Madison", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Simple and Efficient Weighted Minwise Hashing", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7143", "id": "7143", "author": "Anshumali Shrivastava", "abstract": "Weighted minwise hashing (WMH) is one of the fundamental subroutine, required by many celebrated approximation algorithms, commonly adopted in industrial practice for large -scale search and learning. The resource bottleneck with WMH is the computation of multiple (typically a few hundreds to thousands) independent hashes of the data. We propose a simple rejection type sampling scheme based on a carefully designed red-green map, where we show that the number of rejected sample has exactly the same distribution as weighted minwise sampling. The running time of our method, for many practical datasets, is an order of magnitude smaller than existing methods. Experimental evaluations, on real datasets, show that for computing 500 WMH, our proposal can be 60000x faster than the Ioffe's method without losing any accuracy. Our method is also around 100x faster than approximate heuristics capitalizing on the efficient ``densified\" one permutation hashing schemes~\\cite{Proc:OneHashLSH", "bibtex": "@inproceedings{NIPS2016_c2626d85,\n author = {Shrivastava, Anshumali},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Simple and Efficient Weighted Minwise Hashing},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c2626d850c80ea07e7511bbae4c76f4b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c2626d850c80ea07e7511bbae4c76f4b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c2626d850c80ea07e7511bbae4c76f4b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c2626d850c80ea07e7511bbae4c76f4b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c2626d850c80ea07e7511bbae4c76f4b-Reviews.html", "metareview": "", "pdf_size": 508938, "gs_citation": 58, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2964868750851002548&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science, Rice University, Houston, TX, 77005", "aff_domain": "rice.edu", "email": "rice.edu", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c2626d850c80ea07e7511bbae4c76f4b-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Rice University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.rice.edu", "aff_unique_abbr": "Rice", "aff_campus_unique_index": "0", "aff_campus_unique": "Houston", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Single Pass PCA of Matrix Products", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7312", "id": "7312", "author_site": "Shanshan Wu, Srinadh Bhojanapalli, Sujay Sanghavi, Alex Dimakis", "author": "Shanshan Wu; Srinadh Bhojanapalli; Sujay Sanghavi; Alexandros G Dimakis", "abstract": "In this paper we present a new algorithm for computing a low rank approximation of the product $A^TB$ by taking only a single pass of the two matrices $A$ and $B$. The straightforward way to do this is to (a) first sketch $A$ and $B$ individually, and then (b) find the top components using PCA on the sketch. Our algorithm in contrast retains additional summary information about $A,B$ (e.g. row and column norms etc.) and uses this additional information to obtain an improved approximation from the sketches. Our main analytical result establishes a comparable spectral norm guarantee to existing two-pass methods; in addition we also provide results from an Apache Spark implementation that shows better computational and statistical performance on real-world and synthetic evaluation datasets.", "bibtex": "@inproceedings{NIPS2016_0e55666a,\n author = {Wu, Shanshan and Bhojanapalli, Srinadh and Sanghavi, Sujay and Dimakis, Alexandros G},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Single Pass PCA of Matrix Products},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0e55666a4ad822e0e34299df3591d979-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0e55666a4ad822e0e34299df3591d979-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0e55666a4ad822e0e34299df3591d979-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0e55666a4ad822e0e34299df3591d979-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0e55666a4ad822e0e34299df3591d979-Reviews.html", "metareview": "", "pdf_size": 535305, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16704408921917762422&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "The University of Texas at Austin; Toyota Technological Institute at Chicago; The University of Texas at Austin; The University of Texas at Austin", "aff_domain": "utexas.edu;ttic.edu;mail.utexas.edu;austin.utexas.edu", "email": "utexas.edu;ttic.edu;mail.utexas.edu;austin.utexas.edu", "github": "https://github.com/wushanshan/MatrixProductPCA", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0e55666a4ad822e0e34299df3591d979-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Texas at Austin;Toyota Technological Institute at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.tti-chicago.org", "aff_unique_abbr": "UT Austin;TTI Chicago", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Austin;Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Single-Image Depth Perception in the Wild", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7401", "id": "7401", "author_site": "Weifeng Chen, Zhao Fu, Dawei Yang, Jia Deng", "author": "Weifeng Chen; Zhao Fu; Dawei Yang; Jia Deng", "abstract": "This paper studies single-image depth perception in the wild, i.e., recovering depth from a single image taken in unconstrained settings. We introduce a new dataset \u201cDepth in the Wild\u201d consisting of images in the wild annotated with relative depth between pairs of random points. We also propose a new algorithm that learns to estimate metric depth using annotations of relative depth. Compared to the state of the art, our algorithm is simpler and performs better. Experiments show that our algorithm, combined with existing RGB-D data and our new relative depth annotations, significantly improves single-image depth perception in the wild.", "bibtex": "@inproceedings{NIPS2016_0deb1c54,\n author = {Chen, Weifeng and Fu, Zhao and Yang, Dawei and Deng, Jia},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Single-Image Depth Perception in the Wild},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0deb1c54814305ca9ad266f53bc82511-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0deb1c54814305ca9ad266f53bc82511-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0deb1c54814305ca9ad266f53bc82511-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0deb1c54814305ca9ad266f53bc82511-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0deb1c54814305ca9ad266f53bc82511-Reviews.html", "metareview": "", "pdf_size": 6123443, "gs_citation": 667, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8375727379722127143&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of Michigan, Ann Arbor; University of Michigan, Ann Arbor; University of Michigan, Ann Arbor; University of Michigan, Ann Arbor", "aff_domain": "umich.edu;umich.edu;umich.edu;umich.edu", "email": "umich.edu;umich.edu;umich.edu;umich.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0deb1c54814305ca9ad266f53bc82511-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Solving Marginal MAP Problems with NP Oracles and Parity Constraints", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7359", "id": "7359", "author_site": "Yexiang Xue, zhiyuan li, Stefano Ermon, Carla Gomes, Bart Selman", "author": "Yexiang Xue; Zhiyuan Li; Stefano Ermon; Carla P. Gomes; Bart Selman", "abstract": "Arising from many applications at the intersection of decision-making and machine learning, Marginal Maximum A Posteriori (Marginal MAP) problems unify the two main classes of inference, namely maximization (optimization) and marginal inference (counting), and are believed to have higher complexity than both of them. We propose XOR", "bibtex": "@inproceedings{NIPS2016_a532400e,\n author = {Xue, Yexiang and Li, Zhiyuan and Ermon, Stefano and Gomes, Carla P and Selman, Bart},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Solving Marginal MAP Problems with NP Oracles and Parity Constraints},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a532400ed62e772b9dc0b86f46e583ff-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a532400ed62e772b9dc0b86f46e583ff-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a532400ed62e772b9dc0b86f46e583ff-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a532400ed62e772b9dc0b86f46e583ff-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a532400ed62e772b9dc0b86f46e583ff-Reviews.html", "metareview": "", "pdf_size": 1405084, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3614607603808356976&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Department of Computer Science, Cornell University; Institute of Interdisciplinary Information Sciences, Tsinghua University + Department of Computer Science, Cornell University; Department of Computer Science, Stanford University; Department of Computer Science, Cornell University; Department of Computer Science, Cornell University", "aff_domain": "cs.cornell.edu;mails.tsinghua.edu.cn;cs.stanford.edu;cs.cornell.edu;cs.cornell.edu", "email": "cs.cornell.edu;mails.tsinghua.edu.cn;cs.stanford.edu;cs.cornell.edu;cs.cornell.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a532400ed62e772b9dc0b86f46e583ff-Abstract.html", "aff_unique_index": "0;1+0;2;0;0", "aff_unique_norm": "Cornell University;Tsinghua University;Stanford University", "aff_unique_dep": "Department of Computer Science;Institute of Interdisciplinary Information Sciences;Department of Computer Science", "aff_unique_url": "https://www.cornell.edu;https://www.tsinghua.edu.cn;https://www.stanford.edu", "aff_unique_abbr": "Cornell;THU;Stanford", "aff_campus_unique_index": ";1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1+0;0;0;0", "aff_country_unique": "United States;China" }, { "title": "Solving Random Systems of Quadratic Equations via Truncated Generalized Gradient Flow", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7273", "id": "7273", "author_site": "Gang Wang, Georgios Giannakis", "author": "Gang Wang; Georgios Giannakis", "abstract": "This paper puts forth a novel algorithm, termed \\emph{truncated generalized gradient flow} (TGGF), to solve for $\\bm{x}\\in\\mathbb{R}^n/\\mathbb{C}^n$ a system of $m$ quadratic equations $y_i=|\\langle\\bm{a}_i,\\bm{x}\\rangle|^2$, $i=1,2,\\ldots,m$, which even for $\\left\\{\\bm{a}_i\\in\\mathbb{R}^n/\\mathbb{C}^n\\right\\}_{i=1}^m$ random is known to be \\emph{NP-hard} in general. We prove that as soon as the number of equations $m$ is on the order of the number of unknowns $n$, TGGF recovers the solution exactly (up to a global unimodular constant) with high probability and complexity growing linearly with the time required to read the data $\\left\\{\\left(\\bm{a}_i;\\,y_i\\right)\\right\\}_{i=1}^m$. Specifically, TGGF proceeds in two stages: s1) A novel \\emph{orthogonality-promoting} initialization that is obtained with simple power iterations; and, s2) a refinement of the initial estimate by successive updates of scalable \\emph{truncated generalized gradient iterations}. The former is in sharp contrast to the existing spectral initializations, while the latter handles the rather challenging nonconvex and nonsmooth \\emph{amplitude-based} cost function. Numerical tests demonstrate that: i) The novel orthogonality-promoting initialization method returns more accurate and robust estimates relative to its spectral counterparts; and ii) even with the same initialization, our refinement/truncation outperforms Wirtinger-based alternatives, all corroborating the superior performance of TGGF over state-of-the-art algorithms.", "bibtex": "@inproceedings{NIPS2016_5b8add2a,\n author = {Wang, Gang and Giannakis, Georgios},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Solving Random Systems of Quadratic Equations via Truncated Generalized Gradient Flow},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5b8add2a5d98b1a652ea7fd72d942dac-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5b8add2a5d98b1a652ea7fd72d942dac-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/5b8add2a5d98b1a652ea7fd72d942dac-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5b8add2a5d98b1a652ea7fd72d942dac-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5b8add2a5d98b1a652ea7fd72d942dac-Reviews.html", "metareview": "", "pdf_size": 639528, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15203520287862470725&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "ECE Dept. and Digital Tech. Center, Univ. of Minnesota, Mpls, MN 55455, USA+School of Automation, Beijing Institute of Technology, Beijing 100081, China; School of Automation, Beijing Institute of Technology, Beijing 100081, China", "aff_domain": "umn.edu;umn.edu", "email": "umn.edu;umn.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5b8add2a5d98b1a652ea7fd72d942dac-Abstract.html", "aff_unique_index": "0+1;1", "aff_unique_norm": "University of Minnesota;Beijing Institute of Technology", "aff_unique_dep": "Electrical and Computer Engineering;School of Automation", "aff_unique_url": "https://www.umn.edu;http://www.bit.edu.cn", "aff_unique_abbr": "UMN;BIT", "aff_campus_unique_index": "0+1;1", "aff_campus_unique": "Minneapolis;Beijing", "aff_country_unique_index": "0+1;1", "aff_country_unique": "United States;China" }, { "title": "Sorting out typicality with the inverse moment matrix SOS polynomial", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6928", "id": "6928", "author_site": "Edouard Pauwels, Jean Lasserre", "author": "Edouard Pauwels; Jean B Lasserre", "abstract": "We study a surprising phenomenon related to the representation of a cloud of data points using polynomials. We start with the previously unnoticed empirical observation that, given a collection (a cloud) of data points, the sublevel sets of a certain distinguished polynomial capture the shape of the cloud very accurately. This distinguished polynomial is a sum-of-squares (SOS) derived in a simple manner from the inverse of the empirical moment matrix. In fact, this SOS polynomial is directly related to orthogonal polynomials and the Christoffel function. This allows to generalize and interpret extremality properties of orthogonal polynomials and to provide a mathematical rationale for the observed phenomenon. Among diverse potential applications, we illustrate the relevance of our results on a network intrusion detection task for which we obtain performances similar to existing dedicated methods reported in the literature.", "bibtex": "@inproceedings{NIPS2016_2b24d495,\n author = {Pauwels, Edouard and Lasserre, Jean B},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sorting out typicality with the inverse moment matrix SOS polynomial},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2b24d495052a8ce66358eb576b8912c8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2b24d495052a8ce66358eb576b8912c8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2b24d495052a8ce66358eb576b8912c8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2b24d495052a8ce66358eb576b8912c8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2b24d495052a8ce66358eb576b8912c8-Reviews.html", "metareview": "", "pdf_size": 588186, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1997253944194312518&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "LAAS-CNRS & IMT, Universit\u00e9 de Toulouse; IRIT & IMT, Universit\u00e9 Toulouse 3 Paul Sabatier", "aff_domain": "laas.fr;irit.fr", "email": "laas.fr;irit.fr", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2b24d495052a8ce66358eb576b8912c8-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 de Toulouse;Universit\u00e9 Toulouse 3 Paul Sabatier", "aff_unique_dep": "LAAS-CNRS & IMT;IRIT & IMT", "aff_unique_url": "https://www.univ-toulouse.fr;https://www.univ-tlse3.fr", "aff_unique_abbr": "UT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "SoundNet: Learning Sound Representations from Unlabeled Video", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8512", "id": "8512", "author_site": "Yusuf Aytar, Carl Vondrick, Antonio Torralba", "author": "Yusuf Aytar; Carl Vondrick; Antonio Torralba", "abstract": "We learn rich natural sound representations by capitalizing on large amounts of unlabeled sound data collected in the wild. We leverage the natural synchronization between vision and sound to learn an acoustic representation using two-million unlabeled videos. Unlabeled video has the advantage that it can be economically acquired at massive scales, yet contains useful signals about natural sound. We propose a student-teacher training procedure which transfers discriminative visual knowledge from well established visual recognition models into the sound modality using unlabeled video as a bridge. Our sound representation yields significant performance improvements over the state-of-the-art results on standard benchmarks for acoustic scene/object classification. Visualizations suggest some high-level semantics automatically emerge in the sound network, even though it is trained without ground truth labels.", "bibtex": "@inproceedings{NIPS2016_7dcd340d,\n author = {Aytar, Yusuf and Vondrick, Carl and Torralba, Antonio},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {SoundNet: Learning Sound Representations from Unlabeled Video},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7dcd340d84f762eba80aa538b0c527f7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7dcd340d84f762eba80aa538b0c527f7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7dcd340d84f762eba80aa538b0c527f7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7dcd340d84f762eba80aa538b0c527f7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7dcd340d84f762eba80aa538b0c527f7-Reviews.html", "metareview": "", "pdf_size": 5847470, "gs_citation": 1340, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10397330204607504126&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "MIT; MIT; MIT", "aff_domain": "csail.mit.edu;mit.edu;mit.edu", "email": "csail.mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7dcd340d84f762eba80aa538b0c527f7-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Sparse Support Recovery with Non-smooth Loss Functions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6947", "id": "6947", "author_site": "K\u00e9vin Degraux, Gabriel Peyr\u00e9, Jalal Fadili, Laurent Jacques", "author": "K\u00e9vin Degraux; Gabriel Peyr\u00e9; Jalal Fadili; Laurent Jacques", "abstract": "In this paper, we study the support recovery guarantees of underdetermined sparse regression using the $\\ell_1$-norm as a regularizer and a non-smooth loss function for data fidelity. More precisely, we focus in detail on the cases of $\\ell_1$ and $\\ell_\\infty$ losses, and contrast them with the usual $\\ell_2$ loss.While these losses are routinely used to account for either sparse ($\\ell_1$ loss) or uniform ($\\ell_\\infty$ loss) noise models, a theoretical analysis of their performance is still lacking. In this article, we extend the existing theory from the smooth $\\ell_2$ case to these non-smooth cases. We derive a sharp condition which ensures that the support of the vector to recover is stable to small additive noise in the observations, as long as the loss constraint size is tuned proportionally to the noise level. A distinctive feature of our theory is that it also explains what happens when the support is unstable. While the support is not stable anymore, we identify an \"extended support\" and show that this extended support is stable to small additive noise. To exemplify the usefulness of our theory, we give a detailed numerical analysis of the support stability/instability of compressed sensing recovery with these different losses. This highlights different parameter regimes, ranging from total support stability to progressively increasing support instability.", "bibtex": "@inproceedings{NIPS2016_5a1e3a5a,\n author = {Degraux, K\\'{e}vin and Peyr\\'{e}, Gabriel and Fadili, Jalal and Jacques, Laurent},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sparse Support Recovery with Non-smooth Loss Functions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5a1e3a5aede16d438c38862cac1a78db-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5a1e3a5aede16d438c38862cac1a78db-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/5a1e3a5aede16d438c38862cac1a78db-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5a1e3a5aede16d438c38862cac1a78db-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5a1e3a5aede16d438c38862cac1a78db-Reviews.html", "metareview": "", "pdf_size": 603814, "gs_citation": 6, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8251726204067618800&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "ISPGroup/ICTEAM, FNRS, Universit\u00e9 catholique de Louvain, Louvain-la-Neuve, Belgium 1348; CNRS, DMA, \u00c9cole Normale Sup\u00e9rieure, Paris, France 75775; Normandie Univ, ENSICAEN, CNRS, GREYC, Caen, France 14050; ISPGroup/ICTEAM, FNRS, Universit\u00e9 catholique de Louvain, Louvain-la-Neuve, Belgium 1348", "aff_domain": "uclouvain.be;ens.fr;ensicaen.fr;uclouvain.be", "email": "uclouvain.be;ens.fr;ensicaen.fr;uclouvain.be", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5a1e3a5aede16d438c38862cac1a78db-Abstract.html", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Universit\u00e9 catholique de Louvain;\u00c9cole Normale Sup\u00e9rieure;Normandie University", "aff_unique_dep": "ISPGroup/ICTEAM;DMA;ENSICAEN", "aff_unique_url": "https://www.uclouvain.be;https://www.ens.fr;https://www.univ-normandie.fr", "aff_unique_abbr": ";ENS;Univ Normandie", "aff_campus_unique_index": "0;1;2;0", "aff_campus_unique": "Louvain-la-Neuve;Paris;Caen", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Belgium;France" }, { "title": "Spatio-Temporal Hilbert Maps for Continuous Occupancy Representation in Dynamic Environments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7186", "id": "7186", "author_site": "Ransalu Senanayake, Lionel Ott, Simon O'Callaghan, Fabio Ramos", "author": "Ransalu Senanayake; Lionel Ott; Simon O'Callaghan; Fabio T Ramos", "abstract": "We consider the problem of building continuous occupancy representations in dynamic environments for robotics applications. The problem has hardly been discussed previously due to the complexity of patterns in urban environments, which have both spatial and temporal dependencies. We address the problem as learning a kernel classifier on an efficient feature space. The key novelty of our approach is the incorporation of variations in the time domain into the spatial domain. We propose a method to propagate motion uncertainty into the kernel using a hierarchical model. The main benefit of this approach is that it can directly predict the occupancy state of the map in the future from past observations, being a valuable tool for robot trajectory planning under uncertainty. Our approach preserves the main computational benefits of static Hilbert maps \u2014 using stochastic gradient descent for fast optimization of model parameters and incremental updates as new data are captured. Experiments conducted in road intersections of an urban environment demonstrated that spatio-temporal Hilbert maps can accurately model changes in the map while outperforming other techniques on various aspects.", "bibtex": "@inproceedings{NIPS2016_6f2688a5,\n author = {Senanayake, Ransalu and Ott, Lionel and O\\textquotesingle Callaghan, Simon and Ramos, Fabio T},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Spatio-Temporal Hilbert Maps for Continuous Occupancy Representation in Dynamic Environments},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6f2688a5fce7d48c8d19762b88c32c3b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6f2688a5fce7d48c8d19762b88c32c3b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6f2688a5fce7d48c8d19762b88c32c3b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6f2688a5fce7d48c8d19762b88c32c3b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6f2688a5fce7d48c8d19762b88c32c3b-Reviews.html", "metareview": "", "pdf_size": 2328753, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18343768272749581079&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of Sydney; University of Sydney; Data61/CSIRO, Australia; University of Sydney", "aff_domain": "uni.sydney.edu.au;sydney.edu.au;data61.csiro.au;sydney.edu.au", "email": "uni.sydney.edu.au;sydney.edu.au;data61.csiro.au;sydney.edu.au", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6f2688a5fce7d48c8d19762b88c32c3b-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Sydney;Commonwealth Scientific and Industrial Research Organisation", "aff_unique_dep": ";Data61", "aff_unique_url": "https://www.sydney.edu.au;https://www.csiro.au", "aff_unique_abbr": "USYD;CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "title": "Spatiotemporal Residual Networks for Video Action Recognition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7276", "id": "7276", "author_site": "Christoph Feichtenhofer, Axel Pinz, Richard Wildes", "author": "Christoph Feichtenhofer; Axel Pinz; Richard Wildes", "abstract": "Two-stream Convolutional Networks (ConvNets) have shown strong performance for human action recognition in videos. Recently, Residual Networks (ResNets) have arisen as a new technique to train extremely deep architectures. In this paper, we introduce spatiotemporal ResNets as a combination of these two approaches. Our novel architecture generalizes ResNets for the spatiotemporal domain by introducing residual connections in two ways. First, we inject residual connections between the appearance and motion pathways of a two-stream architecture to allow spatiotemporal interaction between the two streams. Second, we transform pretrained image ConvNets into spatiotemporal networks by equipping these with learnable convolutional filters that are initialized as temporal residual connections and operate on adjacent feature maps in time. This approach slowly increases the spatiotemporal receptive field as the depth of the model increases and naturally integrates image ConvNet design principles. The whole model is trained end-to-end to allow hierarchical learning of complex spatiotemporal features. We evaluate our novel spatiotemporal ResNet using two widely used action recognition benchmarks where it exceeds the previous state-of-the-art.", "bibtex": "@inproceedings{NIPS2016_3e7e0224,\n author = {Feichtenhofer, Christoph and Pinz, Axel and Wildes, Richard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Spatiotemporal Residual Networks for Video Action Recognition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3e7e0224018ab3cf51abb96464d518cd-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3e7e0224018ab3cf51abb96464d518cd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3e7e0224018ab3cf51abb96464d518cd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3e7e0224018ab3cf51abb96464d518cd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3e7e0224018ab3cf51abb96464d518cd-Reviews.html", "metareview": "", "pdf_size": 710646, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff": "Graz University of Technology; Graz University of Technology; York University, Toronto", "aff_domain": "tugraz.at;tugraz.at;cse.yorku.ca", "email": "tugraz.at;tugraz.at;cse.yorku.ca", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3e7e0224018ab3cf51abb96464d518cd-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Graz University of Technology;York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tugraz.at;https://yorku.ca", "aff_unique_abbr": "TUGraz;York U", "aff_campus_unique_index": "1", "aff_campus_unique": ";Toronto", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Austria;Canada" }, { "title": "Spectral Learning of Dynamic Systems from Nonequilibrium Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7248", "id": "7248", "author_site": "Hao Wu, Frank Noe", "author": "Hao Wu; Frank Noe", "abstract": "Observable operator models (OOMs) and related models are one of the most important and powerful tools for modeling and analyzing stochastic systems. They exactly describe dynamics of finite-rank systems and can be efficiently and consistently estimated through spectral learning under the assumption of identically distributed data. In this paper, we investigate the properties of spectral learning without this assumption due to the requirements of analyzing large-time scale systems, and show that the equilibrium dynamics of a system can be extracted from nonequilibrium observation data by imposing an equilibrium constraint. In addition, we propose a binless extension of spectral learning for continuous data. In comparison with the other continuous-valued spectral algorithms, the binless algorithm can achieve consistent estimation of equilibrium dynamics with only linear complexity.", "bibtex": "@inproceedings{NIPS2016_296472c9,\n author = {Wu, Hao and Noe, Frank},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Spectral Learning of Dynamic Systems from Nonequilibrium Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/296472c9542ad4d4788d543508116cbc-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/296472c9542ad4d4788d543508116cbc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/296472c9542ad4d4788d543508116cbc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/296472c9542ad4d4788d543508116cbc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/296472c9542ad4d4788d543508116cbc-Reviews.html", "metareview": "", "pdf_size": 1352195, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13578169514407041492&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": ";", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/296472c9542ad4d4788d543508116cbc-Abstract.html" }, { "title": "Split LBI: An Iterative Regularization Path with Structural Sparsity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7305", "id": "7305", "author_site": "Chendi Huang, Xinwei Sun, Jiechao Xiong, Yuan Yao", "author": "Chendi Huang; Xinwei Sun; Jiechao Xiong; Yuan Yao", "abstract": "An iterative regularization path with structural sparsity is proposed in this paper based on variable splitting and the Linearized Bregman Iteration, hence called \\emph{Split LBI}. Despite its simplicity, Split LBI outperforms the popular generalized Lasso in both theory and experiments. A theory of path consistency is presented that equipped with a proper early stopping, Split LBI may achieve model selection consistency under a family of Irrepresentable Conditions which can be weaker than the necessary and sufficient condition for generalized Lasso. Furthermore, some $\\ell_2$ error bounds are also given at the minimax optimal rates. The utility and benefit of the algorithm are illustrated by applications on both traditional image denoising and a novel example on partial order ranking.", "bibtex": "@inproceedings{NIPS2016_24510415,\n author = {Huang, Chendi and Sun, Xinwei and Xiong, Jiechao and Yao, Yuan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Split LBI: An Iterative Regularization Path with Structural Sparsity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2451041557a22145b3701b0184109cab-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2451041557a22145b3701b0184109cab-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2451041557a22145b3701b0184109cab-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2451041557a22145b3701b0184109cab-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2451041557a22145b3701b0184109cab-Reviews.html", "metareview": "", "pdf_size": 747240, "gs_citation": 26, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3398671434439112037&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Peking University; Peking University; Peking University; Hong Kong University of Science and Technology + Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;ust.hk", "email": "pku.edu.cn;pku.edu.cn;pku.edu.cn;ust.hk", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2451041557a22145b3701b0184109cab-Abstract.html", "aff_unique_index": "0;0;0;1+0", "aff_unique_norm": "Peking University;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.ust.hk", "aff_unique_abbr": "Peking U;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0+0", "aff_country_unique": "China" }, { "title": "Statistical Inference for Cluster Trees", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7084", "id": "7084", "author_site": "Jisu KIM, Yen-Chi Chen, Sivaraman Balakrishnan, Alessandro Rinaldo, Larry Wasserman", "author": "Jisu KIM; Yen-Chi Chen; Sivaraman Balakrishnan; Alessandro Rinaldo; Larry Wasserman", "abstract": "A cluster tree provides an intuitive summary of a density function that reveals essential structure about the high-density clusters. The true cluster tree is estimated from a finite sample from an unknown true density. This paper addresses the basic question of quantifying our uncertainty by assessing the statistical significance of different features of an empirical cluster tree. We first study a variety of metrics that can be used to compare different trees, analyzing their properties and assessing their suitability for our inference task. We then propose methods to construct and summarize confidence sets for the unknown true cluster tree. We introduce a partial ordering on cluster trees which we use to prune some of the statistically insignificant features of the empirical tree, yielding interpretable and parsimonious cluster trees. Finally, we provide a variety of simulations to illustrate our proposed methods and furthermore demonstrate their utility in the analysis of a Graft-versus-Host Disease (GvHD) data set.", "bibtex": "@inproceedings{NIPS2016_a9b7ba70,\n author = {KIM, Jisu and Chen, Yen-Chi and Balakrishnan, Sivaraman and Rinaldo, Alessandro and Wasserman, Larry},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Statistical Inference for Cluster Trees},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/a9b7ba70783b617e9998dc4dd82eb3c5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/a9b7ba70783b617e9998dc4dd82eb3c5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/a9b7ba70783b617e9998dc4dd82eb3c5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/a9b7ba70783b617e9998dc4dd82eb3c5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/a9b7ba70783b617e9998dc4dd82eb3c5-Reviews.html", "metareview": "", "pdf_size": 920105, "gs_citation": 32, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3910874322094636560&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Statistics, Carnegie Mellon University, Pittsburgh, USA; Department of Statistics, University of Washington, Seattle, USA; Department of Statistics, Carnegie Mellon University, Pittsburgh, USA; Department of Statistics, Carnegie Mellon University, Pittsburgh, USA; Department of Statistics, Carnegie Mellon University, Pittsburgh, USA", "aff_domain": "andrew.cmu.edu;uw.edu;stat.cmu.edu;stat.cmu.edu;stat.cmu.edu", "email": "andrew.cmu.edu;uw.edu;stat.cmu.edu;stat.cmu.edu;stat.cmu.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/a9b7ba70783b617e9998dc4dd82eb3c5-Abstract.html", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Carnegie Mellon University;University of Washington", "aff_unique_dep": "Department of Statistics;Department of Statistics", "aff_unique_url": "https://www.cmu.edu;https://www.washington.edu", "aff_unique_abbr": "CMU;UW", "aff_campus_unique_index": "0;1;0;0;0", "aff_campus_unique": "Pittsburgh;Seattle", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Statistical Inference for Pairwise Graphical Models Using Score Matching", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7135", "id": "7135", "author_site": "Ming Yu, Mladen Kolar, Varun Gupta", "author": "Ming Yu; Mladen Kolar; Varun Gupta", "abstract": "Probabilistic graphical models have been widely used to model complex systems and aid scientific discoveries. As a result, there is a large body of literature focused on consistent model selection. However, scientists are often interested in understanding uncertainty associated with the estimated parameters, which current literature has not addressed thoroughly. In this paper, we propose a novel estimator for edge parameters for pairwise graphical models based on Hyv\\\"arinen scoring rule. Hyv\\\"arinen scoring rule is especially useful in cases where the normalizing constant cannot be obtained efficiently in a closed form. We prove that the estimator is $\\sqrt{n}$-consistent and asymptotically Normal. This result allows us to construct confidence intervals for edge parameters, as well as, hypothesis tests. We establish our results under conditions that are typically assumed in the literature for consistent estimation. However, we do not require that the estimator consistently recovers the graph structure. In particular, we prove that the asymptotic distribution of the estimator is robust to model selection mistakes and uniformly valid for a large number of data-generating processes. We illustrate validity of our estimator through extensive simulation studies.", "bibtex": "@inproceedings{NIPS2016_411ae1bf,\n author = {Yu, Ming and Kolar, Mladen and Gupta, Varun},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Statistical Inference for Pairwise Graphical Models Using Score Matching},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/411ae1bf081d1674ca6091f8c59a266f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/411ae1bf081d1674ca6091f8c59a266f-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/411ae1bf081d1674ca6091f8c59a266f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/411ae1bf081d1674ca6091f8c59a266f-Reviews.html", "metareview": "", "pdf_size": 499005, "gs_citation": 49, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15668970530170184559&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of Chicago Booth School of Business; University of Chicago Booth School of Business; University of Chicago Booth School of Business", "aff_domain": "chicagobooth.edu;chicagobooth.edu;chicagobooth.edu", "email": "chicagobooth.edu;chicagobooth.edu;chicagobooth.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/411ae1bf081d1674ca6091f8c59a266f-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "Booth School of Business", "aff_unique_url": "https://\u5e03\u65af\u829d\u52a0\u54e5\u5927\u5b66.com", "aff_unique_abbr": "UChicago", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Stein Variational Gradient Descent: A General Purpose Bayesian Inference Algorithm", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7009", "id": "7009", "author_site": "Qiang Liu, Dilin Wang", "author": "Qiang Liu; Dilin Wang", "abstract": "We propose a general purpose variational inference algorithm that forms a natural counterpart of gradient descent for optimization. Our method iteratively transports a set of particles to match the target distribution, by applying a form of functional gradient descent that minimizes the KL divergence. Empirical studies are performed on various real world models and datasets, on which our method is competitive with existing state-of-the-art methods. The derivation of our method is based on a new theoretical result that connects the derivative of KL divergence under smooth transforms with Stein\u2019s identity and a recently proposed kernelized Stein discrepancy, which is of independent interest.", "bibtex": "@inproceedings{NIPS2016_b3ba8f1b,\n author = {Liu, Qiang and Wang, Dilin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stein Variational Gradient Descent: A General Purpose Bayesian Inference Algorithm},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b3ba8f1bee1238a2f37603d90b58898d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b3ba8f1bee1238a2f37603d90b58898d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b3ba8f1bee1238a2f37603d90b58898d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b3ba8f1bee1238a2f37603d90b58898d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b3ba8f1bee1238a2f37603d90b58898d-Reviews.html", "metareview": "", "pdf_size": 477157, "gs_citation": 1374, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14137569249878560716&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science, Dartmouth College; Department of Computer Science, Dartmouth College", "aff_domain": "dartmouth.edu;dartmouth.edu", "email": "dartmouth.edu;dartmouth.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b3ba8f1bee1238a2f37603d90b58898d-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Dartmouth College", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://dartmouth.edu", "aff_unique_abbr": "Dartmouth", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Stochastic Gradient Geodesic MCMC Methods", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7171", "id": "7171", "author_site": "Chang Liu, Jun Zhu, Yang Song", "author": "Chang Liu; Jun Zhu; Yang Song", "abstract": "We propose two stochastic gradient MCMC methods for sampling from Bayesian posterior distributions defined on Riemann manifolds with a known geodesic flow, e.g. hyperspheres. Our methods are the first scalable sampling methods on these manifolds, with the aid of stochastic gradients. Novel dynamics are conceived and 2nd-order integrators are developed. By adopting embedding techniques and the geodesic integrator, the methods do not require a global coordinate system of the manifold and do not involve inner iterations. Synthetic experiments show the validity of the method, and its application to the challenging inference for spherical topic models indicate practical usability and efficiency.", "bibtex": "@inproceedings{NIPS2016_77f959f1,\n author = {Liu, Chang and Zhu, Jun and Song, Yang},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Gradient Geodesic MCMC Methods},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/77f959f119f4fb2321e9ce801e2f5163-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/77f959f119f4fb2321e9ce801e2f5163-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/77f959f119f4fb2321e9ce801e2f5163-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/77f959f119f4fb2321e9ce801e2f5163-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/77f959f119f4fb2321e9ce801e2f5163-Reviews.html", "metareview": "", "pdf_size": 714728, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17598050742186686948&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/77f959f119f4fb2321e9ce801e2f5163-Abstract.html" }, { "title": "Stochastic Gradient MCMC with Stale Gradients", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6994", "id": "6994", "author_site": "Changyou Chen, Nan Ding, Chunyuan Li, Yizhe Zhang, Lawrence Carin", "author": "Changyou Chen; Nan Ding; Chunyuan Li; Yizhe Zhang; Lawrence Carin", "abstract": "Stochastic gradient MCMC (SG-MCMC) has played an important role in large-scale Bayesian learning, with well-developed theoretical convergence properties. In such applications of SG-MCMC, it is becoming increasingly popular to employ distributed systems, where stochastic gradients are computed based on some outdated parameters, yielding what are termed stale gradients. While stale gradients could be directly used in SG-MCMC, their impact on convergence properties has not been well studied. In this paper we develop theory to show that while the bias and MSE of an SG-MCMC algorithm depend on the staleness of stochastic gradients, its estimation variance (relative to the expected estimate, based on a prescribed number of samples) is independent of it. In a simple Bayesian distributed system with SG-MCMC, where stale gradients are computed asynchronously by a set of workers, our theory indicates a linear speedup on the decrease of estimation variance w.r.t. the number of workers. Experiments on synthetic data and deep neural networks validate our theory, demonstrating the effectiveness and scalability of SG-MCMC with stale gradients.", "bibtex": "@inproceedings{NIPS2016_ac796a52,\n author = {Chen, Changyou and Ding, Nan and Li, Chunyuan and Zhang, Yizhe and Carin, Lawrence},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Gradient MCMC with Stale Gradients},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ac796a52db3f16bbdb6557d3d89d1c5a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ac796a52db3f16bbdb6557d3d89d1c5a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ac796a52db3f16bbdb6557d3d89d1c5a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ac796a52db3f16bbdb6557d3d89d1c5a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ac796a52db3f16bbdb6557d3d89d1c5a-Reviews.html", "metareview": "", "pdf_size": 618877, "gs_citation": 34, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14806761461391467110&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Dept. of Electrical and Computer Engineering, Duke University, Durham, NC, USA+Google Inc., Venice, CA, USA; Google Inc., Venice, CA, USA; Dept. of Electrical and Computer Engineering, Duke University, Durham, NC, USA; Dept. of Electrical and Computer Engineering, Duke University, Durham, NC, USA; Dept. of Electrical and Computer Engineering, Duke University, Durham, NC, USA", "aff_domain": "duke.edu;google.com;duke.edu;duke.edu;duke.edu", "email": "duke.edu;google.com;duke.edu;duke.edu;duke.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ac796a52db3f16bbdb6557d3d89d1c5a-Abstract.html", "aff_unique_index": "0+1;1;0;0;0", "aff_unique_norm": "Duke University;Google", "aff_unique_dep": "Department of Electrical and Computer Engineering;Google Inc.", "aff_unique_url": "https://www.duke.edu;https://www.google.com", "aff_unique_abbr": "Duke;Google", "aff_campus_unique_index": "0+1;1;0;0;0", "aff_campus_unique": "Durham;Venice", "aff_country_unique_index": "0+0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stochastic Gradient Methods for Distributionally Robust Optimization with f-divergences", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7106", "id": "7106", "author_site": "Hongseok Namkoong, John Duchi", "author": "Hongseok Namkoong; John C. Duchi", "abstract": "We develop efficient solution methods for a robust empirical risk minimization problem designed to give calibrated confidence intervals on performance and provide optimal tradeoffs between bias and variance. Our methods apply to distributionally robust optimization problems proposed by Ben-Tal et al., which put more weight on observations inducing high loss via a worst-case approach over a non-parametric uncertainty set on the underlying data distribution. Our algorithm solves the resulting minimax problems with nearly the same computational cost of stochastic gradient descent through the use of several carefully designed data structures. For a sample of size n, the per-iteration cost of our method scales as O(log n), which allows us to give optimality certificates that distributionally robust optimization provides at little extra cost compared to empirical risk minimization and stochastic gradient methods.", "bibtex": "@inproceedings{NIPS2016_4588e674,\n author = {Namkoong, Hongseok and Duchi, John C},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Gradient Methods for Distributionally Robust Optimization with f-divergences},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/4588e674d3f0faf985047d4c3f13ed0d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/4588e674d3f0faf985047d4c3f13ed0d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/4588e674d3f0faf985047d4c3f13ed0d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/4588e674d3f0faf985047d4c3f13ed0d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/4588e674d3f0faf985047d4c3f13ed0d-Reviews.html", "metareview": "", "pdf_size": 795408, "gs_citation": 421, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8205814408702168147&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Stanford University; Stanford University", "aff_domain": "stanford.edu;stanford.edu", "email": "stanford.edu;stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/4588e674d3f0faf985047d4c3f13ed0d-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Stochastic Gradient Richardson-Romberg Markov Chain Monte Carlo", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6984", "id": "6984", "author_site": "Alain Durmus, Umut Simsekli, Eric Moulines, Roland Badeau, Ga\u00ebl RICHARD", "author": "Alain Durmus; Umut Simsekli; Eric Moulines; Roland Badeau; Ga\u00ebl RICHARD", "abstract": "Stochastic Gradient Markov Chain Monte Carlo (SG-MCMC) algorithms have become increasingly popular for Bayesian inference in large-scale applications. Even though these methods have proved useful in several scenarios, their performance is often limited by their bias. In this study, we propose a novel sampling algorithm that aims to reduce the bias of SG-MCMC while keeping the variance at a reasonable level. Our approach is based on a numerical sequence acceleration method, namely the Richardson-Romberg extrapolation, which simply boils down to running almost the same SG-MCMC algorithm twice in parallel with different step sizes. We illustrate our framework on the popular Stochastic Gradient Langevin Dynamics (SGLD) algorithm and propose a novel SG-MCMC algorithm referred to as Stochastic Gradient Richardson-Romberg Langevin Dynamics (SGRRLD). We provide formal theoretical analysis and show that SGRRLD is asymptotically consistent, satisfies a central limit theorem, and its non-asymptotic bias and the mean squared-error can be bounded. Our results show that SGRRLD attains higher rates of convergence than SGLD in both finite-time and asymptotically, and it achieves the theoretical accuracy of the methods that are based on higher-order integrators. We support our findings using both synthetic and real data experiments.", "bibtex": "@inproceedings{NIPS2016_03f54461,\n author = {Durmus, Alain and Simsekli, Umut and Moulines, Eric and Badeau, Roland and RICHARD, Ga\\\"{e}l},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Gradient Richardson-Romberg Markov Chain Monte Carlo},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/03f544613917945245041ea1581df0c2-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/03f544613917945245041ea1581df0c2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/03f544613917945245041ea1581df0c2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/03f544613917945245041ea1581df0c2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/03f544613917945245041ea1581df0c2-Reviews.html", "metareview": "", "pdf_size": 2106783, "gs_citation": 42, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4291247746278178618&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "LTCI, CNRS, T\u00b4el\u00b4ecom ParisTech, Universit\u00b4e Paris-Saclay, 75013, Paris, France; LTCI, CNRS, T\u00b4el\u00b4ecom ParisTech, Universit\u00b4e Paris-Saclay, 75013, Paris, France; Centre de Math\u00b4ematiques Appliqu\u00b4ees, UMR 7641, \u00b4Ecole Polytechnique, France; LTCI, CNRS, T\u00b4el\u00b4ecom ParisTech, Universit\u00b4e Paris-Saclay, 75013, Paris, France; LTCI, CNRS, T\u00b4el\u00b4ecom ParisTech, Universit\u00b4e Paris-Saclay, 75013, Paris, France", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/03f544613917945245041ea1581df0c2-Abstract.html", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "T\u00b4el\u00b4ecom ParisTech;Ecole Polytechnique", "aff_unique_dep": "LTCI;Centre de Mathematiques Appliquees", "aff_unique_url": "https://www.telecom-paris.fr;https://www.polytechnique.edu", "aff_unique_abbr": "TPT;Ecole Polytechnique", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "France" }, { "title": "Stochastic Multiple Choice Learning for Training Diverse Deep Ensembles", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7163", "id": "7163", "author_site": "Stefan Lee, Senthil Purushwalkam, Michael Cogswell, Viresh Ranjan, David Crandall, Dhruv Batra", "author": "Stefan Lee; Senthil Purushwalkam Shiva Prakash; Michael Cogswell; Viresh Ranjan; David Crandall; Dhruv Batra", "abstract": "Many practical perception systems exist within larger processes which often include interactions with users or additional components that are capable of evaluating the quality of predicted solutions. In these contexts, it is beneficial to provide these oracle mechanisms with multiple highly likely hypotheses rather than a single prediction. In this work, we pose the task of producing multiple outputs as a learning problem over an ensemble of deep networks -- introducing a novel stochastic gradient descent based approach to minimize the loss with respect to an oracle. Our method is simple to implement, agnostic to both architecture and loss function, and parameter-free. Our approach achieves lower oracle error compared to existing methods on a wide range of tasks and deep architectures. We also show qualitatively that solutions produced from our approach often provide interpretable representations of task ambiguity.", "bibtex": "@inproceedings{NIPS2016_20d135f0,\n author = {Lee, Stefan and Purushwalkam Shiva Prakash, Senthil and Cogswell, Michael and Ranjan, Viresh and Crandall, David and Batra, Dhruv},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Multiple Choice Learning for Training Diverse Deep Ensembles},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/20d135f0f28185b84a4cf7aa51f29500-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/20d135f0f28185b84a4cf7aa51f29500-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/20d135f0f28185b84a4cf7aa51f29500-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/20d135f0f28185b84a4cf7aa51f29500-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/20d135f0f28185b84a4cf7aa51f29500-Reviews.html", "metareview": "", "pdf_size": 2783970, "gs_citation": 233, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16821351862284773339&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Virginia Tech; Carnegie Mellon University; Virginia Tech; Virginia Tech; Indiana University; Virginia Tech", "aff_domain": "vt.edu;andrew.cmu.edu;vt.edu;vt.edu;indiana.edu;vt.edu", "email": "vt.edu;andrew.cmu.edu;vt.edu;vt.edu;indiana.edu;vt.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/20d135f0f28185b84a4cf7aa51f29500-Abstract.html", "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "Virginia Tech;Carnegie Mellon University;Indiana University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vt.edu;https://www.cmu.edu;https://www.indiana.edu", "aff_unique_abbr": "VT;CMU;IU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Stochastic Online AUC Maximization", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7408", "id": "7408", "author_site": "Yiming Ying, Longyin Wen, Siwei Lyu", "author": "Yiming Ying; Longyin Wen; Siwei Lyu", "abstract": "Area under ROC (AUC) is a metric which is widely used for measuring the classification performance for imbalanced data. It is of theoretical and practical interest to develop online learning algorithms that maximizes AUC for large-scale data. A specific challenge in developing online AUC maximization algorithm is that the learning objective function is usually defined over a pair of training examples of opposite classes, and existing methods achieves on-line processing with higher space and time complexity. In this work, we propose a new stochastic online algorithm for AUC maximization. In particular, we show that AUC optimization can be equivalently formulated as a convex-concave saddle point problem. From this saddle representation, a stochastic online algorithm (SOLAM) is proposed which has time and space complexity of one datum. We establish theoretical convergence of SOLAM with high probability and demonstrate its effectiveness and efficiency on standard benchmark datasets.", "bibtex": "@inproceedings{NIPS2016_c52f1bd6,\n author = {Ying, Yiming and Wen, Longyin and Lyu, Siwei},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Online AUC Maximization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c52f1bd66cc19d05628bd8bf27af3ad6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c52f1bd66cc19d05628bd8bf27af3ad6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c52f1bd66cc19d05628bd8bf27af3ad6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c52f1bd66cc19d05628bd8bf27af3ad6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c52f1bd66cc19d05628bd8bf27af3ad6-Reviews.html", "metareview": "", "pdf_size": 419741, "gs_citation": 226, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5136593377133218240&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c52f1bd66cc19d05628bd8bf27af3ad6-Abstract.html" }, { "title": "Stochastic Optimization for Large-scale Optimal Transport", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8509", "id": "8509", "author_site": "Aude Genevay, Marco Cuturi, Gabriel Peyr\u00e9, Francis Bach", "author": "Aude Genevay; Marco Cuturi; Gabriel Peyr\u00e9; Francis Bach", "abstract": "Optimal transport (OT) defines a powerful framework to compare probability distributions in a geometrically faithful way. However, the practical impact of OT is still limited because of its computational burden. We propose a new class of stochastic optimization algorithms to cope with large-scale problems routinely encountered in machine learning applications. These methods are able to manipulate arbitrary distributions (either discrete or continuous) by simply requiring to be able to draw samples from them, which is the typical setup in high-dimensional learning problems. This alleviates the need to discretize these densities, while giving access to provably convergent methods that output the correct distance without discretization error. These algorithms rely on two main ideas: (a) the dual OT problem can be re-cast as the maximization of an expectation; (b) entropic regularization of the primal OT problem results in a smooth dual optimization optimization which can be addressed with algorithms that have a provably faster convergence. We instantiate these ideas in three different computational setups: (i) when comparing a discrete distribution to another, we show that incremental stochastic optimization schemes can beat the current state of the art finite dimensional OT solver (Sinkhorn's algorithm) ; (ii) when comparing a discrete distribution to a continuous density, a re-formulation (semi-discrete) of the dual program is amenable to averaged stochastic gradient descent, leading to better performance than approximately solving the problem by discretization ; (iii) when dealing with two continuous densities, we propose a stochastic gradient descent over a reproducing kernel Hilbert space (RKHS). This is currently the only known method to solve this problem, and is more efficient than discretizing beforehand the two densities. We backup these claims on a set of discrete, semi-discrete and continuous benchmark problems.", "bibtex": "@inproceedings{NIPS2016_2a27b814,\n author = {Genevay, Aude and Cuturi, Marco and Peyr\\'{e}, Gabriel and Bach, Francis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Optimization for Large-scale Optimal Transport},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2a27b8144ac02f67687f76782a3b5d8f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2a27b8144ac02f67687f76782a3b5d8f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2a27b8144ac02f67687f76782a3b5d8f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2a27b8144ac02f67687f76782a3b5d8f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2a27b8144ac02f67687f76782a3b5d8f-Reviews.html", "metareview": "", "pdf_size": 1058549, "gs_citation": 586, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5913614547489555553&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "CEREMADE, Universit\u00e9 Paris-Dauphine+INRIA \u2013 Mokaplan project-team; CREST, ENSAE+Universit\u00e9 Paris-Saclay; CNRS and DMA, \u00c9cole Normale Sup\u00e9rieure+INRIA \u2013 Mokaplan project-team; INRIA \u2013 Sierra project-team+DI, ENS", "aff_domain": "ceremade.dauphine.fr;ensae.fr;ens.fr;inria.fr", "email": "ceremade.dauphine.fr;ensae.fr;ens.fr;inria.fr", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2a27b8144ac02f67687f76782a3b5d8f-Abstract.html", "aff_unique_index": "0+1;2+3;4+1;1+5", "aff_unique_norm": "Universit\u00e9 Paris-Dauphine;INRIA;CREST;Universit\u00e9 Paris-Saclay;\u00c9cole Normale Sup\u00e9rieure;Ecole Normale Superieure", "aff_unique_dep": "CEREMADE;Mokaplan project-team;;;CNRS and DMA;DI", "aff_unique_url": "https://www.univ-paris-dauphine.fr;https://www.inria.fr;https://www.crest.fr;https://www.universite-paris-saclay.fr;https://www.ens.fr;https://www.ens.fr", "aff_unique_abbr": ";INRIA;CREST;UPSaclay;ENS;ENS", "aff_campus_unique_index": ";;;", "aff_campus_unique": "", "aff_country_unique_index": "0+0;0+0;0+0;0+0", "aff_country_unique": "France" }, { "title": "Stochastic Structured Prediction under Bandit Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7329", "id": "7329", "author_site": "Artem Sokolov, Julia Kreutzer, Stefan Riezler, Christopher Lo", "author": "Artem Sokolov; Julia Kreutzer; Stefan Riezler; Christopher Lo", "abstract": "Stochastic structured prediction under bandit feedback follows a learning protocol where on each of a sequence of iterations, the learner receives an input, predicts an output structure, and receives partial feedback in form of a task loss evaluation of the predicted structure. We present applications of this learning scenario to convex and non-convex objectives for structured prediction and analyze them as stochastic first-order methods. We present an experimental evaluation on problems of natural language processing over exponential output spaces, and compare convergence speed across different objectives under the practical criterion of optimal task performance on development data and the optimization-theoretic criterion of minimal squared gradient norm. Best results under both criteria are obtained for a non-convex objective for pairwise preference learning under bandit feedback.", "bibtex": "@inproceedings{NIPS2016_795c7a7a,\n author = {Sokolov, Artem and Kreutzer, Julia and Riezler, Stefan and Lo, Christopher},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Structured Prediction under Bandit Feedback},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/795c7a7a5ec6b460ec00c5841019b9e9-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/795c7a7a5ec6b460ec00c5841019b9e9-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/795c7a7a5ec6b460ec00c5841019b9e9-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/795c7a7a5ec6b460ec00c5841019b9e9-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/795c7a7a5ec6b460ec00c5841019b9e9-Reviews.html", "metareview": "", "pdf_size": 348985, "gs_citation": 35, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4582790389870215935&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Amazon Development Center, Berlin, Germany + Computational Linguistics, Heidelberg University, Germany; Computational Linguistics, Heidelberg University, Germany; Department of Mathematics, Tufts University, Boston, MA, USA + Computational Linguistics, Heidelberg University, Germany; IWR, Heidelberg University, Germany + Computational Linguistics, Heidelberg University, Germany", "aff_domain": "cl.uni-heidelberg.de;cl.uni-heidelberg.de;gmail.com;cl.uni-heidelberg.de", "email": "cl.uni-heidelberg.de;cl.uni-heidelberg.de;gmail.com;cl.uni-heidelberg.de", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/795c7a7a5ec6b460ec00c5841019b9e9-Abstract.html", "aff_unique_index": "0+1;1;2+1;1+1", "aff_unique_norm": "Amazon;Heidelberg University;Tufts University", "aff_unique_dep": "Development Center;Computational Linguistics;Department of Mathematics", "aff_unique_url": "https://www.amazon.de;https://www.uni-heidelberg.de;https://www.tufts.edu", "aff_unique_abbr": "Amazon;Uni Heidelberg;Tufts", "aff_campus_unique_index": "0+1;1;2+1;1+1", "aff_campus_unique": "Berlin;Heidelberg;Boston", "aff_country_unique_index": "0+0;0;1+0;0+0", "aff_country_unique": "Germany;United States" }, { "title": "Stochastic Three-Composite Convex Minimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6964", "id": "6964", "author_site": "Alp Yurtsever, Bang Cong Vu, Volkan Cevher", "author": "Alp Yurtsever; Bang Cong Vu; Volkan Cevher", "abstract": "We propose a stochastic optimization method for the minimization of the sum of three convex functions, one of which has Lipschitz continuous gradient as well as restricted strong convexity. Our approach is most suitable in the setting where it is computationally advantageous to process smooth term in the decomposition with its stochastic gradient estimate and the other two functions separately with their proximal operators, such as doubly regularized empirical risk minimization problems. We prove the convergence characterization of the proposed algorithm in expectation under the standard assumptions for the stochastic gradient estimate of the smooth term. Our method operates in the primal space and can be considered as a stochastic extension of the three-operator splitting method. Finally, numerical evidence supports the effectiveness of our method in real-world problems.", "bibtex": "@inproceedings{NIPS2016_5d6646aa,\n author = {Yurtsever, Alp and Vu, Bang Cong and Cevher, Volkan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Three-Composite Convex Minimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5d6646aad9bcc0be55b2c82f69750387-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5d6646aad9bcc0be55b2c82f69750387-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/5d6646aad9bcc0be55b2c82f69750387-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5d6646aad9bcc0be55b2c82f69750387-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5d6646aad9bcc0be55b2c82f69750387-Reviews.html", "metareview": "", "pdf_size": 2409633, "gs_citation": 36, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3515195249567808927&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Laboratory for Information and Inference Systems (LIONS); Laboratory for Information and Inference Systems (LIONS); Laboratory for Information and Inference Systems (LIONS)", "aff_domain": "ep\ufb02.ch;ep\ufb02.ch;ep\ufb02.ch", "email": "ep\ufb02.ch;ep\ufb02.ch;ep\ufb02.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5d6646aad9bcc0be55b2c82f69750387-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Laboratory for Information and Inference Systems", "aff_unique_dep": "Information and Inference Systems", "aff_unique_url": "", "aff_unique_abbr": "LIONS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "Stochastic Variance Reduction Methods for Saddle-Point Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7121", "id": "7121", "author_site": "Balamurugan Palaniappan, Francis Bach", "author": "Balamurugan Palaniappan; Francis Bach", "abstract": "We consider convex-concave saddle-point problems where the objective functions may be split in many components, and extend recent stochastic variance reduction methods (such as SVRG or SAGA) to provide the first large-scale linearly convergent algorithms for this class of problems which are common in machine learning. While the algorithmic extension is straightforward, it comes with challenges and opportunities: (a) the convex minimization analysis does not apply and we use the notion of monotone operators to prove convergence, showing in particular that the same algorithm applies to a larger class of problems, such as variational inequalities, (b) there are two notions of splits, in terms of functions, or in terms of partial derivatives, (c) the split does need to be done with convex-concave terms, (d) non-uniform sampling is key to an efficient algorithm, both in theory and practice, and (e) these incremental algorithms can be easily accelerated using a simple extension of the \"catalyst\" framework, leading to an algorithm which is always superior to accelerated batch algorithms.", "bibtex": "@inproceedings{NIPS2016_1aa48fc4,\n author = {Palaniappan, Balamurugan and Bach, Francis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Variance Reduction Methods for Saddle-Point Problems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1aa48fc4880bb0c9b8a3bf979d3b917e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1aa48fc4880bb0c9b8a3bf979d3b917e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/1aa48fc4880bb0c9b8a3bf979d3b917e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1aa48fc4880bb0c9b8a3bf979d3b917e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1aa48fc4880bb0c9b8a3bf979d3b917e-Reviews.html", "metareview": "", "pdf_size": 382591, "gs_citation": 260, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3561479211517190210&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "INRIA - Ecole Normale Sup\u00e9rieure, Paris; INRIA - Ecole Normale Sup\u00e9rieure, Paris", "aff_domain": "inria.fr;ens.fr", "email": "inria.fr;ens.fr", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1aa48fc4880bb0c9b8a3bf979d3b917e-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "Ecole Normale Sup\u00e9rieure", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Paris", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "title": "Stochastic Variational Deep Kernel Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6960", "id": "6960", "author_site": "Andrew Wilson, Zhiting Hu, Russ Salakhutdinov, Eric Xing", "author": "Andrew G Wilson; Zhiting Hu; Ruslan Salakhutdinov; Eric P Xing", "abstract": "Deep kernel learning combines the non-parametric flexibility of kernel methods with the inductive biases of deep learning architectures. We propose a novel deep kernel learning model and stochastic variational inference procedure which generalizes deep kernel learning approaches to enable classification, multi-task learning, additive covariance structures, and stochastic gradient training. Specifically, we apply additive base kernels to subsets of output features from deep neural architectures, and jointly learn the parameters of the base kernels and deep network through a Gaussian process marginal likelihood objective. Within this framework, we derive an efficient form of stochastic variational inference which leverages local kernel interpolation, inducing points, and structure exploiting algebra. We show improved performance over stand alone deep networks, SVMs, and state of the art scalable Gaussian processes on several classification benchmarks, including an airline delay dataset containing 6 million training points, CIFAR, and ImageNet.", "bibtex": "@inproceedings{NIPS2016_bcc0d400,\n author = {Wilson, Andrew G and Hu, Zhiting and Salakhutdinov, Russ R and Xing, Eric P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Stochastic Variational Deep Kernel Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/bcc0d400288793e8bdcd7c19a8ac0c2b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/bcc0d400288793e8bdcd7c19a8ac0c2b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/bcc0d400288793e8bdcd7c19a8ac0c2b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/bcc0d400288793e8bdcd7c19a8ac0c2b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/bcc0d400288793e8bdcd7c19a8ac0c2b-Reviews.html", "metareview": "", "pdf_size": 567492, "gs_citation": 355, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13827576791126009802&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/bcc0d400288793e8bdcd7c19a8ac0c2b-Abstract.html" }, { "title": "Strategic Attentive Writer for Learning Macro-Actions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6971", "id": "6971", "author_site": "Alexander (Sasha) Vezhnevets, Volodymyr Mnih, Simon Osindero, Alex Graves, Oriol Vinyals, John Agapiou, koray kavukcuoglu", "author": "Alexander Vezhnevets; Volodymyr Mnih; Simon Osindero; Alex Graves; Oriol Vinyals; John Agapiou; koray kavukcuoglu", "abstract": "We present a novel deep recurrent neural network architecture that learns to build implicit plans in an end-to-end manner purely by interacting with an environment in reinforcement learning setting. The network builds an internal plan, which is continuously updated upon observation of the next input from the environment. It can also partition this internal representation into contiguous sub-sequences by learning for how long the plan can be committed to -- i.e. followed without replaning. Combining these properties, the proposed model, dubbed STRategic Attentive Writer (STRAW) can learn high-level, temporally abstracted macro-actions of varying lengths that are solely learnt from data without any prior information. These macro-actions enable both structured exploration and economic computation. We experimentally demonstrate that STRAW delivers strong improvements on several ATARI games by employing temporally extended planning strategies (e.g. Ms. Pacman and Frostbite). It is at the same time a general algorithm that can be applied on any sequence data. To that end, we also show that when trained on text prediction task, STRAW naturally predicts frequent n-grams (instead of macro-actions), demonstrating the generality of the approach.", "bibtex": "@inproceedings{NIPS2016_c4492cbe,\n author = {Vezhnevets, Alexander and Mnih, Volodymyr and Osindero, Simon and Graves, Alex and Vinyals, Oriol and Agapiou, John and kavukcuoglu, koray},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Strategic Attentive Writer for Learning Macro-Actions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c4492cbe90fbdbf88a5aec486aa81ed5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c4492cbe90fbdbf88a5aec486aa81ed5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c4492cbe90fbdbf88a5aec486aa81ed5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c4492cbe90fbdbf88a5aec486aa81ed5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c4492cbe90fbdbf88a5aec486aa81ed5-Reviews.html", "metareview": "", "pdf_size": 718889, "gs_citation": 186, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12947444681718818134&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c4492cbe90fbdbf88a5aec486aa81ed5-Abstract.html", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Structure-Blind Signal Recovery", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7275", "id": "7275", "author_site": "Dmitry Ostrovsky, Zaid Harchaoui, Anatoli Juditsky, Arkadi S Nemirovski", "author": "Dmitry Ostrovsky; Zaid Harchaoui; Anatoli Juditsky; Arkadi S. Nemirovski", "abstract": "We consider the problem of recovering a signal observed in Gaussian noise. If the set of signals is convex and compact, and can be specified beforehand, one can use classical linear estimators that achieve a risk within a constant factor of the minimax risk. However, when the set is unspecified, designing an estimator that is blind to the hidden structure of the signal remains a challenging problem. We propose a new family of estimators to recover signals observed in Gaussian noise. Instead of specifying the set where the signal lives, we assume the existence of a well-performing linear estimator. Proposed estimators enjoy exact oracle inequalities and can be efficiently computed through convex optimization. We present several numerical illustrations that show the potential of the approach.", "bibtex": "@inproceedings{NIPS2016_2f4fe03d,\n author = {Ostrovsky, Dmitry and Harchaoui, Zaid and Juditsky, Anatoli and Nemirovski, Arkadi S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Structure-Blind Signal Recovery},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2f4fe03d77724a7217006e5d16728874-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2f4fe03d77724a7217006e5d16728874-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2f4fe03d77724a7217006e5d16728874-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2f4fe03d77724a7217006e5d16728874-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2f4fe03d77724a7217006e5d16728874-Reviews.html", "metareview": "", "pdf_size": 388319, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10948131011879686266&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 15, "aff": "LJK, University of Grenoble Alpes; University of Washington; LJK, University of Grenoble Alpes; Georgia Institute of Technology", "aff_domain": "imag.fr;washington.edu;imag.fr; gatech.edu", "email": "imag.fr;washington.edu;imag.fr; gatech.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2f4fe03d77724a7217006e5d16728874-Abstract.html", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Grenoble Alpes;University of Washington;Georgia Institute of Technology", "aff_unique_dep": "LJK;;", "aff_unique_url": "https://www.univ-grenoble-alpes.fr;https://www.washington.edu;https://www.gatech.edu", "aff_unique_abbr": "UGA;UW;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "France;United States" }, { "title": "Structured Matrix Recovery via the Generalized Dantzig Selector", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7079", "id": "7079", "author_site": "Sheng Chen, Arindam Banerjee", "author": "Sheng Chen; Arindam Banerjee", "abstract": "In recent years, structured matrix recovery problems have gained considerable attention for its real world applications, such as recommender systems and computer vision. Much of the existing work has focused on matrices with low-rank structure, and limited progress has been made on matrices with other types of structure. In this paper we present non-asymptotic analysis for estimation of generally structured matrices via the generalized Dantzig selector based on sub-Gaussian measurements. We show that the estimation error can always be succinctly expressed in terms of a few geometric measures such as Gaussian widths of suitable sets associated with the structure of the underlying true matrix. Further, we derive general bounds on these geometric measures for structures characterized by unitarily invariant norms, a large family covering most matrix norms of practical interest. Examples are provided to illustrate the utility of our theoretical development.", "bibtex": "@inproceedings{NIPS2016_806beafe,\n author = {Chen, Sheng and Banerjee, Arindam},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Structured Matrix Recovery via the Generalized Dantzig Selector},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/806beafe154032a5b818e97b4420ad98-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/806beafe154032a5b818e97b4420ad98-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/806beafe154032a5b818e97b4420ad98-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/806beafe154032a5b818e97b4420ad98-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/806beafe154032a5b818e97b4420ad98-Reviews.html", "metareview": "", "pdf_size": 325329, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1664822637361102237&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Dept. of Computer Science & Engineering, University of Minnesota, Twin Cities; Dept. of Computer Science & Engineering, University of Minnesota, Twin Cities", "aff_domain": "cs.umn.edu;cs.umn.edu", "email": "cs.umn.edu;cs.umn.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/806beafe154032a5b818e97b4420ad98-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Minnesota", "aff_unique_dep": "Department of Computer Science & Engineering", "aff_unique_url": "https://www.umn.edu", "aff_unique_abbr": "UMN", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Twin Cities", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Structured Prediction Theory Based on Factor Graph Complexity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7294", "id": "7294", "author_site": "Corinna Cortes, Vitaly Kuznetsov, Mehryar Mohri, Scott Yang", "author": "Corinna Cortes; Vitaly Kuznetsov; Mehryar Mohri; Scott Yang", "abstract": "We present a general theoretical analysis of structured prediction with a series of new results. We give new data-dependent margin guarantees for structured prediction for a very wide family of loss functions and a general family of hypotheses, with an arbitrary factor graph decomposition. These are the tightest margin bounds known for both standard multi-class and general structured prediction problems. Our guarantees are expressed in terms of a data-dependent complexity measure, \\emph{factor graph complexity}, which we show can be estimated from data and bounded in terms of familiar quantities for several commonly used hypothesis sets, and a sparsity measure for features and graphs. Our proof techniques include generalizations of Talagrand's contraction lemma that can be of independent interest. We further extend our theory by leveraging the principle of Voted Risk Minimization (VRM) and show that learning is possible even with complex factor graphs. We present new learning bounds for this advanced setting, which we use to devise two new algorithms, \\emph{Voted Conditional Random Field} (VCRF) and \\emph{Voted Structured Boosting} (StructBoost). These algorithms can make use of complex features and factor graphs and yet benefit from favorable learning guarantees. We also report the results of experiments with VCRF on several datasets to validate our theory.", "bibtex": "@inproceedings{NIPS2016_535ab766,\n author = {Cortes, Corinna and Kuznetsov, Vitaly and Mohri, Mehryar and Yang, Scott},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Structured Prediction Theory Based on Factor Graph Complexity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/535ab76633d94208236a2e829ea6d888-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/535ab76633d94208236a2e829ea6d888-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/535ab76633d94208236a2e829ea6d888-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/535ab76633d94208236a2e829ea6d888-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/535ab76633d94208236a2e829ea6d888-Reviews.html", "metareview": "", "pdf_size": 1250640, "gs_citation": 74, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10645563053444545449&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Google Research, New York, NY 10011; Google Research, New York, NY 10011; Courant Institute and Google, New York, NY 10012; Courant Institute, New York, NY 10012", "aff_domain": "google.com;cims.nyu.edu;cims.nyu.edu;cims.nyu.edu", "email": "google.com;cims.nyu.edu;cims.nyu.edu;cims.nyu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/535ab76633d94208236a2e829ea6d888-Abstract.html", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Google;Courant Institute of Mathematical Sciences", "aff_unique_dep": "Google Research;Mathematical Sciences", "aff_unique_url": "https://research.google;https://courant.nyu.edu", "aff_unique_abbr": "Google;Courant", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "New York", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Structured Sparse Regression via Greedy Hard Thresholding", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7386", "id": "7386", "author_site": "Prateek Jain, Nikhil Rao, Inderjit Dhillon", "author": "Prateek Jain; Nikhil Rao; Inderjit S Dhillon", "abstract": "Several learning applications require solving high-dimensional regression problems where the relevant features belong to a small number of (overlapping) groups. For very large datasets and under standard sparsity constraints, hard thresholding methods have proven to be extremely efficient, but such methods require NP hard projections when dealing with overlapping groups. In this paper, we show that such NP-hard projections can not only be avoided by appealing to submodular optimization, but such methods come with strong theoretical guarantees even in the presence of poorly conditioned data (i.e. say when two features have correlation $\\geq 0.99$), which existing analyses cannot handle. These methods exhibit an interesting computation-accuracy trade-off and can be extended to significantly harder problems such as sparse overlapping groups. Experiments on both real and synthetic data validate our claims and demonstrate that the proposed methods are orders of magnitude faster than other greedy and convex relaxation techniques for learning with group-structured sparsity.", "bibtex": "@inproceedings{NIPS2016_8e82ab72,\n author = {Jain, Prateek and Rao, Nikhil and Dhillon, Inderjit S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Structured Sparse Regression via Greedy Hard Thresholding},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8e82ab7243b7c66d768f1b8ce1c967eb-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8e82ab7243b7c66d768f1b8ce1c967eb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8e82ab7243b7c66d768f1b8ce1c967eb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8e82ab7243b7c66d768f1b8ce1c967eb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8e82ab7243b7c66d768f1b8ce1c967eb-Reviews.html", "metareview": "", "pdf_size": 858712, "gs_citation": 47, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8830384887763721968&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8e82ab7243b7c66d768f1b8ce1c967eb-Abstract.html" }, { "title": "Sub-sampled Newton Methods with Non-uniform Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7191", "id": "7191", "author_site": "Peng Xu, Jiyan Yang, Farbod Roosta-Khorasani, Christopher R\u00e9, Michael Mahoney", "author": "Peng Xu; Jiyan Yang; Fred Roosta; Christopher R\u00e9; Michael W. Mahoney", "abstract": "We consider the problem of finding the minimizer of a convex function $F: \\mathbb R^d \\rightarrow \\mathbb R$ of the form $F(w) \\defeq \\sum_{i=1}^n f_i(w) + R(w)$ where a low-rank factorization of $\\nabla^2 f_i(w)$ is readily available.We consider the regime where $n \\gg d$. We propose randomized Newton-type algorithms that exploit \\textit{non-uniform} sub-sampling of $\\{\\nabla^2 f_i(w)\\}_{i=1}^{n}$, as well as inexact updates, as means to reduce the computational complexity, and are applicable to a wide range of problems in machine learning. Two non-uniform sampling distributions based on {\\it block norm squares} and {\\it block partial leverage scores} are considered. Under certain assumptions, we show that our algorithms inherit a linear-quadratic convergence rate in $w$ and achieve a lower computational complexity compared to similar existing methods. In addition, we show that our algorithms exhibit more robustness and better dependence on problem specific quantities, such as the condition number. We numerically demonstrate the advantages of our algorithms on several real datasets.", "bibtex": "@inproceedings{NIPS2016_55c567fd,\n author = {Xu, Peng and Yang, Jiyan and Roosta, Fred and R\\'{e}, Christopher and Mahoney, Michael W},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sub-sampled Newton Methods with Non-uniform Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/55c567fd4395ecef6d936cf77b8d5b2b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/55c567fd4395ecef6d936cf77b8d5b2b-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/55c567fd4395ecef6d936cf77b8d5b2b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/55c567fd4395ecef6d936cf77b8d5b2b-Reviews.html", "metareview": "", "pdf_size": 493776, "gs_citation": 151, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9982836633864864413&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Stanford University; Stanford University; University of California at Berkeley; Stanford University; University of California at Berkeley", "aff_domain": "stanford.edu;stanford.edu;icsi.berkeley.edu;cs.stanford.edu;stat.berkeley.edu", "email": "stanford.edu;stanford.edu;icsi.berkeley.edu;cs.stanford.edu;stat.berkeley.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/55c567fd4395ecef6d936cf77b8d5b2b-Abstract.html", "aff_unique_index": "0;0;1;0;1", "aff_unique_norm": "Stanford University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.berkeley.edu", "aff_unique_abbr": "Stanford;UC Berkeley", "aff_campus_unique_index": "0;0;1;0;1", "aff_campus_unique": "Stanford;Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Sublinear Time Orthogonal Tensor Decomposition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7155", "id": "7155", "author_site": "Zhao Song, David Woodruff, Huan Zhang", "author": "Zhao Song; David Woodruff; Huan Zhang", "abstract": "A recent work (Wang et. al., NIPS 2015) gives the fastest known algorithms for orthogonal tensor decomposition with provable guarantees. Their algorithm is based on computing sketches of the input tensor, which requires reading the entire input. We show in a number of cases one can achieve the same theoretical guarantees in sublinear time, i.e., even without reading most of the input tensor. Instead of using sketches to estimate inner products in tensor decomposition algorithms, we use importance sampling. To achieve sublinear time, we need to know the norms of tensor slices, and we show how to do this in a number of important cases. For symmetric tensors $ T = \\sum_{i=1}^k \\lambda_i u_i^{\\otimes p}$ with $\\lambda_i > 0$ for all i, we estimate such norms in sublinear time whenever p is even. For the important case of p = 3 and small values of k, we can also estimate such norms. For asymmetric tensors sublinear time is not possible in general, but we show if the tensor slice norms are just slightly below $\\| T \\|_F$ then sublinear time is again possible. One of the main strengths of our work is empirical - in a number of cases our algorithm is orders of magnitude faster than existing methods with the same accuracy.", "bibtex": "@inproceedings{NIPS2016_25ddc0f8,\n author = {Song, Zhao and Woodruff, David and Zhang, Huan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Sublinear Time Orthogonal Tensor Decomposition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/25ddc0f8c9d3e22e03d3076f98d83cb2-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/25ddc0f8c9d3e22e03d3076f98d83cb2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/25ddc0f8c9d3e22e03d3076f98d83cb2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/25ddc0f8c9d3e22e03d3076f98d83cb2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/25ddc0f8c9d3e22e03d3076f98d83cb2-Reviews.html", "metareview": "", "pdf_size": 516280, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13346902829932698795&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Dept. of Computer Science, University of Texas, Austin, USA; IBM Almaden Research Center, San Jose, USA; Dept. of Electrical and Computer Engineering, University of California, Davis, USA", "aff_domain": "utexas.edu;us.ibm.com;ucdavis.edu", "email": "utexas.edu;us.ibm.com;ucdavis.edu", "github": "", "project": "https://arxiv.org/", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/25ddc0f8c9d3e22e03d3076f98d83cb2-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Texas at Austin;IBM;University of California, Davis", "aff_unique_dep": "Department of Computer Science;IBM Almaden Research Center;Dept. of Electrical and Computer Engineering", "aff_unique_url": "https://www.utexas.edu;https://www.ibm.com/research/almaden;https://www.ucdavis.edu", "aff_unique_abbr": "UT Austin;IBM ARC;UC Davis", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Austin;San Jose;Davis", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Supervised Learning with Tensor Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7064", "id": "7064", "author_site": "Edwin M Stoudenmire, David Schwab", "author": "Edwin Stoudenmire; David J Schwab", "abstract": "Tensor networks are approximations of high-order tensors which are efficient to work with and have been very successful for physics and mathematics applications. We demonstrate how algorithms for optimizing tensor networks can be adapted to supervised learning tasks by using matrix product states (tensor trains) to parameterize non-linear kernel learning models. For the MNIST data set we obtain less than 1% test set classification error. We discuss an interpretation of the additional structure imparted by the tensor network to the learned model.", "bibtex": "@inproceedings{NIPS2016_5314b967,\n author = {Stoudenmire, Edwin and Schwab, David J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Supervised Learning with Tensor Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5314b9674c86e3f9d1ba25ef9bb32895-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5314b9674c86e3f9d1ba25ef9bb32895-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5314b9674c86e3f9d1ba25ef9bb32895-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5314b9674c86e3f9d1ba25ef9bb32895-Reviews.html", "metareview": "", "pdf_size": 1200891, "gs_citation": 705, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7347813007666790273&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Perimeter Institute for Theoretical Physics, Waterloo, Ontario, N2L 2Y5, Canada; Department of Physics, Northwestern University, Evanston, IL", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5314b9674c86e3f9d1ba25ef9bb32895-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Perimeter Institute for Theoretical Physics;Northwestern University", "aff_unique_dep": ";Department of Physics", "aff_unique_url": "https://www.perimeterinstitute.ca/;https://www.northwestern.edu", "aff_unique_abbr": "PI;NU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Waterloo;Evanston", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United States" }, { "title": "Supervised Word Mover's Distance", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7422", "id": "7422", "author_site": "Gao Huang, Chuan Guo, Matt J Kusner, Yu Sun, Fei Sha, Kilian Weinberger", "author": "Gao Huang; Chuan Guo; Matt J Kusner; Yu Sun; Fei Sha; Kilian Q. Weinberger", "abstract": "Accurately measuring the similarity between text documents lies at the core of many real world applications of machine learning. These include web-search ranking, document recommendation, multi-lingual document matching, and article categorization. Recently, a new document metric, the word mover's distance (WMD), has been proposed with unprecedented results on kNN-based document classification. The WMD elevates high quality word embeddings to document metrics by formulating the distance between two documents as an optimal transport problem between the embedded words. However, the document distances are entirely unsupervised and lack a mechanism to incorporate supervision when available. In this paper we propose an efficient technique to learn a supervised metric, which we call the Supervised WMD (S-WMD) metric. Our algorithm learns document distances that measure the underlying semantic differences between documents by leveraging semantic differences between individual words discovered during supervised training. This is achieved with an linear transformation of the underlying word embedding space and tailored word-specific weights, learned to minimize the stochastic leave-one-out nearest neighbor classification error on a per-document level. We evaluate our metric on eight real-world text classification tasks on which S-WMD consistently outperforms almost all of our 26 competitive baselines.", "bibtex": "@inproceedings{NIPS2016_10c66082,\n author = {Huang, Gao and Guo, Chuan and Kusner, Matt J and Sun, Yu and Sha, Fei and Weinberger, Kilian Q},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Supervised Word Mover\\textquotesingle s Distance},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/10c66082c124f8afe3df4886f5e516e0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/10c66082c124f8afe3df4886f5e516e0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/10c66082c124f8afe3df4886f5e516e0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/10c66082c124f8afe3df4886f5e516e0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/10c66082c124f8afe3df4886f5e516e0-Reviews.html", "metareview": "", "pdf_size": 1358317, "gs_citation": 264, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9307065558981927940&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Cornell University; Cornell University; Alan Turing Institute, University of Warwick; Cornell University; Cornell University; University of California, Los Angeles", "aff_domain": "cornell.edu;cornell.edu;turing.ac.uk;cornell.edu;cornell.edu;cs.ucla.edu", "email": "cornell.edu;cornell.edu;turing.ac.uk;cornell.edu;cornell.edu;cs.ucla.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/10c66082c124f8afe3df4886f5e516e0-Abstract.html", "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "Cornell University;University of Warwick;University of California, Los Angeles", "aff_unique_dep": ";Alan Turing Institute;", "aff_unique_url": "https://www.cornell.edu;https://warwick.ac.uk;https://www.ucla.edu", "aff_unique_abbr": "Cornell;Warwick;UCLA", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Warwick;Los Angeles", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "Supervised learning through the lens of compression", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7217", "id": "7217", "author_site": "Ofir David, Shay Moran, Amir Yehudayoff", "author": "Ofir David; Shay Moran; Amir Yehudayoff", "abstract": "This work continues the study of the relationship between sample compression schemes and statistical learning, which has been mostly investigated within the framework of binary classification. We first extend the investigation to multiclass categorization: we prove that in this case learnability is equivalent to compression of logarithmic sample size and that the uniform convergence property implies compression of constant size. We use the compressibility-learnability equivalence to show that (i) for multiclass categorization, PAC and agnostic PAC learnability are equivalent, and (ii) to derive a compactness theorem for learnability. We then consider supervised learning under general loss functions: we show that in this case, in order to maintain the compressibility-learnability equivalence, it is necessary to consider an approximate variant of compression. We use it to show that PAC and agnostic PAC are not equivalent, even when the loss function has only three values.", "bibtex": "@inproceedings{NIPS2016_59f51fd6,\n author = {David, Ofir and Moran, Shay and Yehudayoff, Amir},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Supervised learning through the lens of compression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/59f51fd6937412b7e56ded1ea2470c25-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/59f51fd6937412b7e56ded1ea2470c25-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/59f51fd6937412b7e56ded1ea2470c25-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/59f51fd6937412b7e56ded1ea2470c25-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/59f51fd6937412b7e56ded1ea2470c25-Reviews.html", "metareview": "", "pdf_size": 310671, "gs_citation": 55, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13176449780102640271&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 2, "aff": "Department of Mathematics, Technion - Israel Institute of Technology; Department of Computer Science, Technion - Israel Institute of Technology; Department of Mathematics, Technion - Israel Institute of Technology", "aff_domain": "tx.technion.ac.il;cs.technion.ac.il;gmail.com", "email": "tx.technion.ac.il;cs.technion.ac.il;gmail.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/59f51fd6937412b7e56ded1ea2470c25-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "Department of Mathematics", "aff_unique_url": "https://www.technion.ac.il", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "title": "Swapout: Learning an ensemble of deep architectures", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7348", "id": "7348", "author_site": "Saurabh Singh, Derek Hoiem, David Forsyth", "author": "Saurabh Singh; Derek Hoiem; David Forsyth", "abstract": "We describe Swapout, a new stochastic training method, that outperforms ResNets of identical network structure yielding impressive results on CIFAR-10 and CIFAR-100. Swapout samples from a rich set of architectures including dropout, stochastic depth and residual architectures as special cases. When viewed as a regularization method swapout not only inhibits co-adaptation of units in a layer, similar to dropout, but also across network layers. We conjecture that swapout achieves strong regularization by implicitly tying the parameters across layers. When viewed as an ensemble training method, it samples a much richer set of architectures than existing methods such as dropout or stochastic depth. We propose a parameterization that reveals connections to exiting architectures and suggests a much richer set of architectures to be explored. We show that our formulation suggests an efficient training method and validate our conclusions on CIFAR-10 and CIFAR-100 matching state of the art accuracy. Remarkably, our 32 layer wider model performs similar to a 1001 layer ResNet model.", "bibtex": "@inproceedings{NIPS2016_c51ce410,\n author = {Singh, Saurabh and Hoiem, Derek and Forsyth, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Swapout: Learning an ensemble of deep architectures},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c51ce410c124a10e0db5e4b97fc2af39-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c51ce410c124a10e0db5e4b97fc2af39-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c51ce410c124a10e0db5e4b97fc2af39-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c51ce410c124a10e0db5e4b97fc2af39-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c51ce410c124a10e0db5e4b97fc2af39-Reviews.html", "metareview": "", "pdf_size": 455417, "gs_citation": 186, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17737070486654210820&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, University of Illinois, Urbana-Champaign; Department of Computer Science, University of Illinois, Urbana-Champaign; Department of Computer Science, University of Illinois, Urbana-Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu", "email": "illinois.edu;illinois.edu;illinois.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c51ce410c124a10e0db5e4b97fc2af39-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois, Urbana-Champaign", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Synthesis of MCMC and Belief Propagation", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7210", "id": "7210", "author_site": "Sungsoo Ahn, Michael Chertkov, Jinwoo Shin", "author": "Sung-Soo Ahn; Michael Chertkov; Jinwoo Shin", "abstract": "Markov Chain Monte Carlo (MCMC) and Belief Propagation (BP) are the most popular algorithms for computational inference in Graphical Models (GM). In principle, MCMC is an exact probabilistic method which, however, often suffers from exponentially slow mixing. In contrast, BP is a deterministic method, which is typically fast, empirically very successful, however in general lacking control of accuracy over loopy graphs. In this paper, we introduce MCMC algorithms correcting the approximation error of BP, i.e., we provide a way to compensate for BP errors via a consecutive BP-aware MCMC. Our framework is based on the Loop Calculus (LC) approach which allows to express the BP error as a sum of weighted generalized loops. Although the full series is computationally intractable, it is known that a truncated series, summing up all 2-regular loops, is computable in polynomial-time for planar pair-wise binary GMs and it also provides a highly accurate approximation empirically. Motivated by this, we, first, propose a polynomial-time approximation MCMC scheme for the truncated series of general (non-planar) pair-wise binary models. Our main idea here is to use the Worm algorithm, known to provide fast mixing in other (related) problems, and then design an appropriate rejection scheme to sample 2-regular loops. Furthermore, we also design an efficient rejection-free MCMC scheme for approximating the full series. The main novelty underlying our design is in utilizing the concept of cycle basis, which provides an efficient decomposition of the generalized loops. In essence, the proposed MCMC schemes run on transformed GM built upon the non-trivial BP solution, and our experiments show that this synthesis of BP and MCMC outperforms both direct MCMC and bare BP schemes.", "bibtex": "@inproceedings{NIPS2016_3b5dca50,\n author = {Ahn, Sung-Soo and Chertkov, Michael and Shin, Jinwoo},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Synthesis of MCMC and Belief Propagation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3b5dca501ee1e6d8cd7b905f4e1bf723-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3b5dca501ee1e6d8cd7b905f4e1bf723-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3b5dca501ee1e6d8cd7b905f4e1bf723-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3b5dca501ee1e6d8cd7b905f4e1bf723-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3b5dca501ee1e6d8cd7b905f4e1bf723-Reviews.html", "metareview": "", "pdf_size": 543545, "gs_citation": 13, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7207083745159207626&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "School of Electrical Engineering, Korea Advanced Institute of Science and Technology, Daejeon, Korea + Skolkovo Institute of Science and Technology, 143026 Moscow, Russia; Theoretical Division, T-4 & Center for Nonlinear Studies, Los Alamos National Laboratory, Los Alamos, NM 87545, USA + Skolkovo Institute of Science and Technology, 143026 Moscow, Russia; School of Electrical Engineering, Korea Advanced Institute of Science and Technology, Daejeon, Korea", "aff_domain": "kaist.ac.kr;lanl.gov;kaist.ac.kr", "email": "kaist.ac.kr;lanl.gov;kaist.ac.kr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3b5dca501ee1e6d8cd7b905f4e1bf723-Abstract.html", "aff_unique_index": "0+1;2+1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Skolkovo Institute of Science and Technology;Los Alamos National Laboratory", "aff_unique_dep": "School of Electrical Engineering;;Theoretical Division, T-4 & Center for Nonlinear Studies", "aff_unique_url": "https://www.kaist.ac.kr;https://www.skoltech.ru;https://www.lanl.gov", "aff_unique_abbr": "KAIST;Skoltech;LANL", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Daejeon;;Los Alamos", "aff_country_unique_index": "0+1;2+1;0", "aff_country_unique": "South Korea;Russian Federation;United States" }, { "title": "Synthesizing the preferred inputs for neurons in neural networks via deep generator networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6926", "id": "6926", "author_site": "Anh Nguyen, Alexey Dosovitskiy, Jason Yosinski, Thomas Brox, Jeff Clune", "author": "Anh Nguyen; Alexey Dosovitskiy; Jason Yosinski; Thomas Brox; Jeff Clune", "abstract": "Deep neural networks (DNNs) have demonstrated state-of-the-art results on many pattern recognition tasks, especially vision classification problems. Understanding the inner workings of such computational brains is both fascinating basic science that is interesting in its own right---similar to why we study the human brain---and will enable researchers to further improve DNNs. One path to understanding how a neural network functions internally is to study what each of its neurons has learned to detect. One such method is called activation maximization, which synthesizes an input (e.g. an image) that highly activates a neuron. Here we dramatically improve the qualitative state of the art of activation maximization by harnessing a powerful, learned prior: a deep generator network. The algorithm (1) generates qualitatively state-of-the-art synthetic images that look almost real, (2) reveals the features learned by each neuron in an interpretable way, (3) generalizes well to new datasets and somewhat well to different network architectures without requiring the prior to be relearned, and (4) can be considered as a high-quality generative method (in this case, by generating novel, creative, interesting, recognizable images).", "bibtex": "@inproceedings{NIPS2016_5d79099f,\n author = {Nguyen, Anh and Dosovitskiy, Alexey and Yosinski, Jason and Brox, Thomas and Clune, Jeff},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Synthesizing the preferred inputs for neurons in neural networks via deep generator networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5d79099fcdf499f12b79770834c0164a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5d79099fcdf499f12b79770834c0164a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/5d79099fcdf499f12b79770834c0164a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5d79099fcdf499f12b79770834c0164a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5d79099fcdf499f12b79770834c0164a-Reviews.html", "metareview": "", "pdf_size": 3258759, "gs_citation": 907, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5242797434107730911&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "University of Wyoming; University of Freiburg; Geometric Intelligence; University of Freiburg; University of Wyoming", "aff_domain": "uwyo.edu;cs.uni-freiburg.de;geometric.ai;cs.uni-freiburg.de;uwyo.edu", "email": "uwyo.edu;cs.uni-freiburg.de;geometric.ai;cs.uni-freiburg.de;uwyo.edu", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5d79099fcdf499f12b79770834c0164a-Abstract.html", "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "University of Wyoming;University of Freiburg;Geometric Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uwyo.edu;https://www.uni-freiburg.de;https://geometricintelligence.com", "aff_unique_abbr": "UW;UoF;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;0", "aff_country_unique": "United States;Germany;United Kingdom" }, { "title": "Tagger: Deep Unsupervised Perceptual Grouping", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7288", "id": "7288", "author_site": "Klaus Greff, Antti Rasmus, Mathias Berglund, Hotloo Xiranood, Harri Valpola, J\u00fcrgen Schmidhuber", "author": "Klaus Greff; Antti Rasmus; Mathias Berglund; Tele Hao; Harri Valpola; J\u00fcrgen Schmidhuber", "abstract": "We present a framework for efficient perceptual inference that explicitly reasons about the segmentation of its inputs and features. Rather than being trained for any specific segmentation, our framework learns the grouping process in an unsupervised manner or alongside any supervised task. We enable a neural network to group the representations of different objects in an iterative manner through a differentiable mechanism. We achieve very fast convergence by allowing the system to amortize the joint iterative inference of the groupings and their representations. In contrast to many other recently proposed methods for addressing multi-object scenes, our system does not assume the inputs to be images and can therefore directly handle other modalities. We evaluate our method on multi-digit classification of very cluttered images that require texture segmentation. Remarkably our method achieves improved classification performance over convolutional networks despite being fully connected, by making use of the grouping mechanism. Furthermore, we observe that our system greatly improves upon the semi-supervised result of a baseline Ladder network on our dataset. These results are evidence that grouping is a powerful tool that can help to improve sample efficiency.", "bibtex": "@inproceedings{NIPS2016_01eee509,\n author = {Greff, Klaus and Rasmus, Antti and Berglund, Mathias and Hao, Tele and Valpola, Harri and Schmidhuber, J\\\"{u}rgen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Tagger: Deep Unsupervised Perceptual Grouping},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/01eee509ee2f68dc6014898c309e86bf-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/01eee509ee2f68dc6014898c309e86bf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/01eee509ee2f68dc6014898c309e86bf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/01eee509ee2f68dc6014898c309e86bf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/01eee509ee2f68dc6014898c309e86bf-Reviews.html", "metareview": "", "pdf_size": 2639554, "gs_citation": 181, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7441786219123187075&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "IDSIA; The Curious AI Company; The Curious AI Company; The Curious AI Company; IDSIA; The Curious AI Company", "aff_domain": "idsia.ch;cai.fi;cai.fi;cai.fi;idsia.ch;cai.fi", "email": "idsia.ch;cai.fi;cai.fi;cai.fi;idsia.ch;cai.fi", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/01eee509ee2f68dc6014898c309e86bf-Abstract.html", "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "Institute of Digital Technologies;Curious AI Company", "aff_unique_dep": ";", "aff_unique_url": "https://www.idsia.ch;", "aff_unique_abbr": "IDSIA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;1", "aff_country_unique": "Switzerland;United States" }, { "title": "Temporal Regularized Matrix Factorization for High-dimensional Time Series Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6916", "id": "6916", "author_site": "Hsiang-Fu Yu, Nikhil Rao, Inderjit Dhillon", "author": "Hsiang-Fu Yu; Nikhil Rao; Inderjit S Dhillon", "abstract": "Time series prediction problems are becoming increasingly high-dimensional in modern applications, such as climatology and demand forecasting. For example, in the latter problem, the number of items for which demand needs to be forecast might be as large as 50,000. In addition, the data is generally noisy and full of missing values. Thus, modern applications require methods that are highly scalable, and can deal with noisy data in terms of corruptions or missing values. However, classical time series methods usually fall short of handling these issues. In this paper, we present a temporal regularized matrix factorization (TRMF) framework which supports data-driven temporal learning and forecasting. We develop novel regularization schemes and use scalable matrix factorization methods that are eminently suited for high-dimensional time series data that has many missing values. Our proposed TRMF is highly general, and subsumes many existing approaches for time series analysis. We make interesting connections to graph regularization methods in the context of learning the dependencies in an autoregressive framework. Experimental results show the superiority of TRMF in terms of scalability and prediction quality. In particular, TRMF is two orders of magnitude faster than other methods on a problem of dimension 50,000, and generates better forecasts on real-world datasets such as Wal-mart E-commerce datasets.", "bibtex": "@inproceedings{NIPS2016_85422afb,\n author = {Yu, Hsiang-Fu and Rao, Nikhil and Dhillon, Inderjit S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Temporal Regularized Matrix Factorization for High-dimensional Time Series Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/85422afb467e9456013a2a51d4dff702-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/85422afb467e9456013a2a51d4dff702-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/85422afb467e9456013a2a51d4dff702-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/85422afb467e9456013a2a51d4dff702-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/85422afb467e9456013a2a51d4dff702-Reviews.html", "metareview": "", "pdf_size": 555227, "gs_citation": 638, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10767037577593544706&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "University of Texas at Austin; Technicolor Research; University of Texas at Austin", "aff_domain": "cs.utexas.edu;gmail.com;cs.utexas.edu", "email": "cs.utexas.edu;gmail.com;cs.utexas.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/85422afb467e9456013a2a51d4dff702-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Texas at Austin;Technicolor", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.utexas.edu;https://www.technicolor.com/en", "aff_unique_abbr": "UT Austin;Technicolor", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;France" }, { "title": "Tensor Switching Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6914", "id": "6914", "author_site": "Chuan-Yung Tsai, Andrew M Saxe, David Cox", "author": "Chuan-Yung Tsai; Andrew M Saxe; Andrew M Saxe; David Cox", "abstract": "We present a novel neural network algorithm, the Tensor Switching (TS) network, which generalizes the Rectified Linear Unit (ReLU) nonlinearity to tensor-valued hidden units. The TS network copies its entire input vector to different locations in an expanded representation, with the location determined by its hidden unit activity. In this way, even a simple linear readout from the TS representation can implement a highly expressive deep-network-like function. The TS network hence avoids the vanishing gradient problem by construction, at the cost of larger representation size. We develop several methods to train the TS network, including equivalent kernels for infinitely wide and deep TS networks, a one-pass linear learning algorithm, and two backpropagation-inspired representation learning algorithms. Our experimental results demonstrate that the TS network is indeed more expressive and consistently learns faster than standard ReLU networks.", "bibtex": "@inproceedings{NIPS2016_b1563a78,\n author = {Tsai, Chuan-Yung and Saxe, Andrew M and Saxe, Andrew M and Cox, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Tensor Switching Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b1563a78ec59337587f6ab6397699afc-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b1563a78ec59337587f6ab6397699afc-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b1563a78ec59337587f6ab6397699afc-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b1563a78ec59337587f6ab6397699afc-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b1563a78ec59337587f6ab6397699afc-Reviews.html", "metareview": "", "pdf_size": 748443, "gs_citation": 12, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=821792041624457626&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": ";;;", "aff_domain": ";;;", "email": ";;;", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b1563a78ec59337587f6ab6397699afc-Abstract.html" }, { "title": "Testing for Differences in Gaussian Graphical Models: Applications to Brain Connectivity", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7209", "id": "7209", "author_site": "Eugene Belilovsky, Ga\u00ebl Varoquaux, Matthew Blaschko", "author": "Eugene Belilovsky; Ga\u00ebl Varoquaux; Matthew B Blaschko", "abstract": "Functional brain networks are well described and estimated from data with Gaussian Graphical Models (GGMs), e.g.\\ using sparse inverse covariance estimators. Comparing functional connectivity of subjects in two populations calls for comparing these estimated GGMs. Our goal is to identify differences in GGMs known to have similar structure. We characterize the uncertainty of differences with confidence intervals obtained using a parametric distribution on parameters of a sparse estimator. Sparse penalties enable statistical guarantees and interpretable models even in high-dimensional and low-sample settings. Characterizing the distributions of sparse models is inherently challenging as the penalties produce a biased estimator. Recent work invokes the sparsity assumptions to effectively remove the bias from a sparse estimator such as the lasso. These distributions can be used to give confidence intervals on edges in GGMs, and by extension their differences. However, in the case of comparing GGMs, these estimators do not make use of any assumed joint structure among the GGMs. Inspired by priors from brain functional connectivity we derive the distribution of parameter differences under a joint penalty when parameters are known to be sparse in the difference. This leads us to introduce the debiased multi-task fused lasso, whose distribution can be characterized in an efficient manner. We then show how the debiased lasso and multi-task fused lasso can be used to obtain confidence intervals on edge differences in GGMs. We validate the techniques proposed on a set of synthetic examples as well as neuro-imaging dataset created for the study of autism.", "bibtex": "@inproceedings{NIPS2016_f9b902fc,\n author = {Belilovsky, Eugene and Varoquaux, Ga\\\"{e}l and Blaschko, Matthew B},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Testing for Differences in Gaussian Graphical Models: Applications to Brain Connectivity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f9b902fc3289af4dd08de5d1de54f68f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f9b902fc3289af4dd08de5d1de54f68f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f9b902fc3289af4dd08de5d1de54f68f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f9b902fc3289af4dd08de5d1de54f68f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f9b902fc3289af4dd08de5d1de54f68f-Reviews.html", "metareview": "", "pdf_size": 3290081, "gs_citation": 84, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15231773652029746191&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "University of Paris-Saclay+INRIA+KU Leuven; INRIA; KU Leuven", "aff_domain": "inria.fr;inria.fr;esat.kuleuven.be", "email": "inria.fr;inria.fr;esat.kuleuven.be", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f9b902fc3289af4dd08de5d1de54f68f-Abstract.html", "aff_unique_index": "0+1+2;1;2", "aff_unique_norm": "University of Paris-Saclay;INRIA;Katholieke Universiteit Leuven", "aff_unique_dep": ";;", "aff_unique_url": "https://www.universite-paris-saclay.fr;https://www.inria.fr;https://www.kuleuven.be", "aff_unique_abbr": "Paris-Saclay;INRIA;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0+0+1;0;1", "aff_country_unique": "France;Belgium" }, { "title": "The Forget-me-not Process", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7256", "id": "7256", "author_site": "Kieran Milan, Joel Veness, James Kirkpatrick, Michael Bowling, Anna Koop, Demis Hassabis", "author": "Kieran Milan; Joel Veness; James Kirkpatrick; Michael Bowling; Anna Koop; Demis Hassabis", "abstract": "We introduce the Forget-me-not Process, an efficient, non-parametric meta-algorithm for online probabilistic sequence prediction for piecewise stationary, repeating sources. Our method works by taking a Bayesian approach to partition a stream of data into postulated task-specific segments, while simultaneously building a model for each task. We provide regret guarantees with respect to piecewise stationary data sources under the logarithmic loss, and validate the method empirically across a range of sequence prediction and task identification problems.", "bibtex": "@inproceedings{NIPS2016_f26dab9b,\n author = {Milan, Kieran and Veness, Joel and Kirkpatrick, James and Bowling, Michael and Koop, Anna and Hassabis, Demis},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Forget-me-not Process},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f26dab9bf6a137c3b6782e562794c2f2-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f26dab9bf6a137c3b6782e562794c2f2-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f26dab9bf6a137c3b6782e562794c2f2-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f26dab9bf6a137c3b6782e562794c2f2-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f26dab9bf6a137c3b6782e562794c2f2-Reviews.html", "metareview": "", "pdf_size": 294338, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3602566794939272403&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 5, "aff": "Google DeepMind\u2020; Google DeepMind\u2020; Google DeepMind; Google DeepMind; University of Alberta; University of Alberta", "aff_domain": "google.com;google.com;google.com;google.com;cs.ualberta.ca;cs.ualberta.ca", "email": "google.com;google.com;google.com;google.com;cs.ualberta.ca;cs.ualberta.ca", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f26dab9bf6a137c3b6782e562794c2f2-Abstract.html", "aff_unique_index": "0;0;0;0;1;1", "aff_unique_norm": "Google;University of Alberta", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.ualberta.ca", "aff_unique_abbr": "DeepMind;UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "United Kingdom;Canada" }, { "title": "The Generalized Reparameterization Gradient", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7182", "id": "7182", "author_site": "Francisco Ruiz, Michalis Titsias, David Blei", "author": "Francisco R Ruiz; Michalis Titsias RC AUEB; David Blei", "abstract": "The reparameterization gradient has become a widely used method to obtain Monte Carlo gradients to optimize the variational objective. However, this technique does not easily apply to commonly used distributions such as beta or gamma without further approximations, and most practical applications of the reparameterization gradient fit Gaussian distributions. In this paper, we introduce the generalized reparameterization gradient, a method that extends the reparameterization gradient to a wider class of variational distributions. Generalized reparameterizations use invertible transformations of the latent variables which lead to transformed distributions that weakly depend on the variational parameters. This results in new Monte Carlo gradients that combine reparameterization gradients and score function gradients. We demonstrate our approach on variational inference for two complex probabilistic models. The generalized reparameterization is effective: even a single sample from the variational distribution is enough to obtain a low-variance gradient.", "bibtex": "@inproceedings{NIPS2016_f718499c,\n author = {Ruiz, Francisco R and Titsias RC AUEB, Michalis and Blei, David},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Generalized Reparameterization Gradient},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f718499c1c8cef6730f9fd03c8125cab-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f718499c1c8cef6730f9fd03c8125cab-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f718499c1c8cef6730f9fd03c8125cab-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f718499c1c8cef6730f9fd03c8125cab-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f718499c1c8cef6730f9fd03c8125cab-Reviews.html", "metareview": "", "pdf_size": 495624, "gs_citation": 209, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17643362576709176335&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "University of Cambridge + Columbia University; Athens University of Economics and Business; Columbia University", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f718499c1c8cef6730f9fd03c8125cab-Abstract.html", "aff_unique_index": "0+1;2;1", "aff_unique_norm": "University of Cambridge;Columbia University;Athens University of Economics and Business", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cam.ac.uk;https://www.columbia.edu;https://www.aueb.gr", "aff_unique_abbr": "Cambridge;Columbia;AUEB", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Cambridge;;Athens", "aff_country_unique_index": "0+1;2;1", "aff_country_unique": "United Kingdom;United States;Greece" }, { "title": "The Limits of Learning with Missing Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6945", "id": "6945", "author_site": "Brian Bullins, Elad Hazan, Tomer Koren", "author": "Brian Bullins; Elad Hazan; Tomer Koren", "abstract": "We study regression and classification in a setting where the learning algorithm is allowed to access only a limited number of attributes per example, known as the limited attribute observation model. In this well-studied model, we provide the first lower bounds giving a limit on the precision attainable by any algorithm for several variants of regression, notably linear regression with the absolute loss and the squared loss, as well as for classification with the hinge loss. We complement these lower bounds with a general purpose algorithm that gives an upper bound on the achievable precision limit in the setting of learning with missing data.", "bibtex": "@inproceedings{NIPS2016_955a1584,\n author = {Bullins, Brian and Hazan, Elad and Koren, Tomer},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Limits of Learning with Missing Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/955a1584af63a546588caae4d23840b3-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/955a1584af63a546588caae4d23840b3-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/955a1584af63a546588caae4d23840b3-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/955a1584af63a546588caae4d23840b3-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/955a1584af63a546588caae4d23840b3-Reviews.html", "metareview": "", "pdf_size": 242639, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1958226645322540483&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Princeton University; Princeton University; Google Brain", "aff_domain": "cs.princeton.edu;cs.princeton.edu;google.com", "email": "cs.princeton.edu;cs.princeton.edu;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/955a1584af63a546588caae4d23840b3-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Princeton University;Google", "aff_unique_dep": ";Google Brain", "aff_unique_url": "https://www.princeton.edu;https://brain.google.com", "aff_unique_abbr": "Princeton;Google Brain", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Multi-fidelity Multi-armed Bandit", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7034", "id": "7034", "author_site": "Kirthevasan Kandasamy, Gautam Dasarathy, Barnabas Poczos, Jeff Schneider", "author": "Kirthevasan Kandasamy; Gautam Dasarathy; Barnabas Poczos; Jeff Schneider", "abstract": "We study a variant of the classical stochastic $K$-armed bandit where observing the outcome of each arm is expensive, but cheap approximations to this outcome are available. For example, in online advertising the performance of an ad can be approximated by displaying it for shorter time periods or to narrower audiences. We formalise this task as a \\emph{multi-fidelity} bandit, where, at each time step, the forecaster may choose to play an arm at any one of $M$ fidelities. The highest fidelity (desired outcome) expends cost $\\costM$. The $m$\\ssth fidelity (an approximation) expends $\\costm < \\costM$ and returns a biased estimate of the highest fidelity. We develop \\mfucb, a novel upper confidence bound procedure for this setting and prove that it naturally adapts to the sequence of available approximations and costs thus attaining better regret than naive strategies which ignore the approximations. For instance, in the above online advertising example, \\mfucbs would use the lower fidelities to quickly eliminate suboptimal ads and reserve the larger expensive experiments on a small set of promising candidates. We complement this result with a lower bound and show that \\mfucbs is nearly optimal under certain conditions.", "bibtex": "@inproceedings{NIPS2016_2ba59664,\n author = {Kandasamy, Kirthevasan and Dasarathy, Gautam and Poczos, Barnabas and Schneider, Jeff},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Multi-fidelity Multi-armed Bandit},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2ba596643cbbbc20318224181fa46b28-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2ba596643cbbbc20318224181fa46b28-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2ba596643cbbbc20318224181fa46b28-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2ba596643cbbbc20318224181fa46b28-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2ba596643cbbbc20318224181fa46b28-Reviews.html", "metareview": "", "pdf_size": 676148, "gs_citation": 46, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6228867623958866277&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Carnegie Mellon University; Rice University; Carnegie Mellon University; Carnegie Mellon University", "aff_domain": "cs.cmu.edu;rice.edu;cs.cmu.edu;cs.cmu.edu", "email": "cs.cmu.edu;rice.edu;cs.cmu.edu;cs.cmu.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2ba596643cbbbc20318224181fa46b28-Abstract.html", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Carnegie Mellon University;Rice University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.rice.edu", "aff_unique_abbr": "CMU;Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "The Multiple Quantile Graphical Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7230", "id": "7230", "author_site": "Alnur Ali, J. Zico Kolter, Ryan Tibshirani", "author": "Alnur Ali; J. Zico Kolter; Ryan J Tibshirani", "abstract": "We introduce the Multiple Quantile Graphical Model (MQGM), which extends the neighborhood selection approach of Meinshausen and Buhlmann for learning sparse graphical models. The latter is defined by the basic subproblem of modeling the conditional mean of one variable as a sparse function of all others. Our approach models a set of conditional quantiles of one variable as a sparse function of all others, and hence offers a much richer, more expressive class of conditional distribution estimates. We establish that, under suitable regularity conditions, the MQGM identifies the exact conditional independencies with probability tending to one as the problem size grows, even outside of the usual homoskedastic Gaussian data model. We develop an efficient algorithm for fitting the MQGM using the alternating direction method of multipliers. We also describe a strategy for sampling from the joint distribution that underlies the MQGM estimate. Lastly, we present detailed experiments that demonstrate the flexibility and effectiveness of the MQGM in modeling hetereoskedastic non-Gaussian data.", "bibtex": "@inproceedings{NIPS2016_537de305,\n author = {Ali, Alnur and Kolter, J. Zico and Tibshirani, Ryan J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Multiple Quantile Graphical Model},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/537de305e941fccdbba5627e3eefbb24-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/537de305e941fccdbba5627e3eefbb24-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/537de305e941fccdbba5627e3eefbb24-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/537de305e941fccdbba5627e3eefbb24-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/537de305e941fccdbba5627e3eefbb24-Reviews.html", "metareview": "", "pdf_size": 1064439, "gs_citation": 30, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11446486775654036707&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Machine Learning Department, Carnegie Mellon University; Computer Science Department, Carnegie Mellon University; Department of Statistics, Carnegie Mellon University", "aff_domain": "cmu.edu;cs.cmu.edu;cmu.edu", "email": "cmu.edu;cs.cmu.edu;cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/537de305e941fccdbba5627e3eefbb24-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Machine Learning Department", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Multiscale Laplacian Graph Kernel", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7417", "id": "7417", "author_site": "Risi Kondor, Horace Pan", "author": "Risi Kondor; Horace Pan", "abstract": "Many real world graphs, such as the graphs of molecules, exhibit structure at multiple different scales, but most existing kernels between graphs are either purely local or purely global in character. In contrast, by building a hierarchy of nested subgraphs, the Multiscale Laplacian Graph kernels (MLG kernels) that we define in this paper can account for structure at a range of different scales. At the heart of the MLG construction is another new graph kernel, called the Feature Space Laplacian Graph kernel (FLG kernel), which has the property that it can lift a base kernel defined on the vertices of two graphs to a kernel between the graphs. The MLG kernel applies such FLG kernels to subgraphs recursively. To make the MLG kernel computationally feasible, we also introduce a randomized projection procedure, similar to the Nystro \u0308m method, but for RKHS operators.", "bibtex": "@inproceedings{NIPS2016_6d3a1e06,\n author = {Kondor, Risi and Pan, Horace},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Multiscale Laplacian Graph Kernel},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/6d3a1e06d6a06349436bc054313b648c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/6d3a1e06d6a06349436bc054313b648c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/6d3a1e06d6a06349436bc054313b648c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/6d3a1e06d6a06349436bc054313b648c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/6d3a1e06d6a06349436bc054313b648c-Reviews.html", "metareview": "", "pdf_size": 341588, "gs_citation": 223, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16878982718407030522&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Computer Science + Department of Statistics, University of Chicago; Department of Computer Science, University of Chicago", "aff_domain": "cs.uchicago.edu;uchicago.edu", "email": "cs.uchicago.edu;uchicago.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/6d3a1e06d6a06349436bc054313b648c-Abstract.html", "aff_unique_index": "0+1;1", "aff_unique_norm": "Unknown Institution;University of Chicago", "aff_unique_dep": "Department of Computer Science;Department of Statistics", "aff_unique_url": ";https://www.uchicago.edu", "aff_unique_abbr": ";UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";United States" }, { "title": "The Parallel Knowledge Gradient Method for Batch Bayesian Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7179", "id": "7179", "author_site": "Jian Wu, Peter Frazier", "author": "Jian Wu; Peter Frazier", "abstract": "In many applications of black-box optimization, one can evaluate multiple points simultaneously, e.g. when evaluating the performances of several different neural network architectures in a parallel computing environment. In this paper, we develop a novel batch Bayesian optimization algorithm --- the parallel knowledge gradient method. By construction, this method provides the one-step Bayes optimal batch of points to sample. We provide an efficient strategy for computing this Bayes-optimal batch of points, and we demonstrate that the parallel knowledge gradient method finds global optima significantly faster than previous batch Bayesian optimization algorithms on both synthetic test functions and when tuning hyperparameters of practical machine learning algorithms, especially when function evaluations are noisy.", "bibtex": "@inproceedings{NIPS2016_18d10dc6,\n author = {Wu, Jian and Frazier, Peter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Parallel Knowledge Gradient Method for Batch Bayesian Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/18d10dc6e666eab6de9215ae5b3d54df-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/18d10dc6e666eab6de9215ae5b3d54df-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/18d10dc6e666eab6de9215ae5b3d54df-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/18d10dc6e666eab6de9215ae5b3d54df-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/18d10dc6e666eab6de9215ae5b3d54df-Reviews.html", "metareview": "", "pdf_size": 897605, "gs_citation": 309, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11024096056363095694&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": ";", "aff_domain": ";", "email": ";", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/18d10dc6e666eab6de9215ae5b3d54df-Abstract.html" }, { "title": "The Power of Adaptivity in Identifying Statistical Alternatives", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7284", "id": "7284", "author_site": "Kevin Jamieson, Daniel Haas, Benjamin Recht", "author": "Kevin G. Jamieson; Daniel Haas; Benjamin Recht", "abstract": "This paper studies the trade-off between two different kinds of pure exploration: breadth versus depth. We focus on the most biased coin problem, asking how many total coin flips are required to identify a ``heavy'' coin from an infinite bag containing both ``heavy'' coins with mean $\\theta_1 \\in (0,1)$, and ``light\" coins with mean $\\theta_0 \\in (0,\\theta_1)$, where heavy coins are drawn from the bag with proportion $\\alpha \\in (0,1/2)$. When $\\alpha,\\theta_0,\\theta_1$ are unknown, the key difficulty of this problem lies in distinguishing whether the two kinds of coins have very similar means, or whether heavy coins are just extremely rare. While existing solutions to this problem require some prior knowledge of the parameters $\\theta_0,\\theta_1,\\alpha$, we propose an adaptive algorithm that requires no such knowledge yet still obtains near-optimal sample complexity guarantees. In contrast, we provide a lower bound showing that non-adaptive strategies require at least quadratically more samples. In characterizing this gap between adaptive and nonadaptive strategies, we make connections to anomaly detection and prove lower bounds on the sample complexity of differentiating between a single parametric distribution and a mixture of two such distributions.", "bibtex": "@inproceedings{NIPS2016_98b29795,\n author = {Jamieson, Kevin G and Haas, Daniel and Recht, Benjamin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Power of Adaptivity in Identifying Statistical Alternatives},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/98b297950041a42470269d56260243a1-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/98b297950041a42470269d56260243a1-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/98b297950041a42470269d56260243a1-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/98b297950041a42470269d56260243a1-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/98b297950041a42470269d56260243a1-Reviews.html", "metareview": "", "pdf_size": 464736, "gs_citation": 29, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16197756952736760081&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/98b297950041a42470269d56260243a1-Abstract.html" }, { "title": "The Power of Optimization from Samples", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7052", "id": "7052", "author_site": "Eric Balkanski, Aviad Rubinstein, Yaron Singer", "author": "Eric Balkanski; Aviad Rubinstein; Yaron Singer", "abstract": "We consider the problem of optimization from samples of monotone submodular functions with bounded curvature. In numerous applications, the function optimized is not known a priori, but instead learned from data. What are the guarantees we have when optimizing functions from sampled data? In this paper we show that for any monotone submodular function with curvature c there is a (1 - c)/(1 + c - c^2) approximation algorithm for maximization under cardinality constraints when polynomially-many samples are drawn from the uniform distribution over feasible sets. Moreover, we show that this algorithm is optimal. That is, for any c < 1, there exists a submodular function with curvature c for which no algorithm can achieve a better approximation. The curvature assumption is crucial as for general monotone submodular functions no algorithm can obtain a constant-factor approximation for maximization under a cardinality constraint when observing polynomially-many samples drawn from any distribution over feasible sets, even when the function is statistically learnable.", "bibtex": "@inproceedings{NIPS2016_c8758b51,\n author = {Balkanski, Eric and Rubinstein, Aviad and Singer, Yaron},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Power of Optimization from Samples},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c8758b517083196f05ac29810b924aca-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c8758b517083196f05ac29810b924aca-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c8758b517083196f05ac29810b924aca-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c8758b517083196f05ac29810b924aca-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c8758b517083196f05ac29810b924aca-Reviews.html", "metareview": "", "pdf_size": 499640, "gs_citation": 57, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14352484479423080362&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Harvard University; University of California, Berkeley; Harvard University", "aff_domain": "g.harvard.edu;eecs.berkeley.edu;seas.harvard.edu", "email": "g.harvard.edu;eecs.berkeley.edu;seas.harvard.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c8758b517083196f05ac29810b924aca-Abstract.html", "aff_unique_index": "0;1;0", "aff_unique_norm": "Harvard University;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.harvard.edu;https://www.berkeley.edu", "aff_unique_abbr": "Harvard;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The Product Cut", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7105", "id": "7105", "author_site": "Thomas Laurent, James von Brecht, Xavier Bresson, arthur szlam", "author": "Thomas Laurent; James von Brecht; Xavier Bresson; arthur szlam", "abstract": "We introduce a theoretical and algorithmic framework for multi-way graph partitioning that relies on a multiplicative cut-based objective. We refer to this objective as the Product Cut. We provide a detailed investigation of the mathematical properties of this objective and an effective algorithm for its optimization. The proposed model has strong mathematical underpinnings, and the corresponding algorithm achieves state-of-the-art performance on benchmark data sets.", "bibtex": "@inproceedings{NIPS2016_ca460332,\n author = {Laurent, Thomas and von Brecht, James and Bresson, Xavier and szlam, arthur},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Product Cut},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ca460332316d6da84b08b9bcf39b687b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ca460332316d6da84b08b9bcf39b687b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ca460332316d6da84b08b9bcf39b687b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ca460332316d6da84b08b9bcf39b687b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ca460332316d6da84b08b9bcf39b687b-Reviews.html", "metareview": "", "pdf_size": 2320530, "gs_citation": -1, "gs_cited_by_link": "", "gs_version_total": -1, "aff": "Nanyang Technological University; Loyola Marymount University; Facebook AI Research; California State University, Long Beach", "aff_domain": "ntu.edu.sg;lmu.edu;fb.com;csulb.edu", "email": "ntu.edu.sg;lmu.edu;fb.com;csulb.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ca460332316d6da84b08b9bcf39b687b-Abstract.html", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Nanyang Technological University;Loyola Marymount University;Meta;California State University, Long Beach", "aff_unique_dep": ";;Facebook AI Research;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.lmu.edu;https://research.facebook.com;https://www.csulb.edu", "aff_unique_abbr": "NTU;LMU;FAIR;CSULB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Long Beach", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Singapore;United States" }, { "title": "The Robustness of Estimator Composition", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7258", "id": "7258", "author_site": "Pingfan Tang, Jeff M Phillips", "author": "Pingfan Tang; Jeff M Phillips", "abstract": "We formalize notions of robustness for composite estimators via the notion of a breakdown point. A composite estimator successively applies two (or more) estimators: on data decomposed into disjoint parts, it applies the first estimator on each part, then the second estimator on the outputs of the first estimator. And so on, if the composition is of more than two estimators. Informally, the breakdown point is the minimum fraction of data points which if significantly modified will also significantly modify the output of the estimator, so it is typically desirable to have a large breakdown point. Our main result shows that, under mild conditions on the individual estimators, the breakdown point of the composite estimator is the product of the breakdown points of the individual estimators. We also demonstrate several scenarios, ranging from regression to statistical testing, where this analysis is easy to apply, useful in understanding worst case robustness, and sheds powerful insights onto the associated data analysis.", "bibtex": "@inproceedings{NIPS2016_dd458505,\n author = {Tang, Pingfan and Phillips, Jeff M},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Robustness of Estimator Composition},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/dd458505749b2941217ddd59394240e8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/dd458505749b2941217ddd59394240e8-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/dd458505749b2941217ddd59394240e8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/dd458505749b2941217ddd59394240e8-Reviews.html", "metareview": "", "pdf_size": 325617, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4963395435452135263&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "School of Computing, University of Utah; School of Computing, University of Utah", "aff_domain": "cs.utah.edu;cs.utah.edu", "email": "cs.utah.edu;cs.utah.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/dd458505749b2941217ddd59394240e8-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Utah", "aff_unique_dep": "School of Computing", "aff_unique_url": "https://www.utah.edu", "aff_unique_abbr": "U of U", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Utah", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "The Sound of APALM Clapping: Faster Nonsmooth Nonconvex Optimization with Stochastic Asynchronous PALM", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7279", "id": "7279", "author_site": "Damek Davis, Brent Edmunds, Madeleine Udell", "author": "Damek Davis; Brent Edmunds; Madeleine Udell", "abstract": "We introduce the Stochastic Asynchronous Proximal Alternating Linearized Minimization (SAPALM) method, a block coordinate stochastic proximal-gradient method for solving nonconvex, nonsmooth optimization problems. SAPALM is the first asynchronous parallel optimization method that provably converges on a large class of nonconvex, nonsmooth problems. We prove that SAPALM matches the best known rates of convergence --- among synchronous or asynchronous methods --- on this problem class. We provide upper bounds on the number of workers for which we can expect to see a linear speedup, which match the best bounds known for less complex problems, and show that in practice SAPALM achieves this linear speedup. We demonstrate state-of-the-art performance on several matrix factorization problems.", "bibtex": "@inproceedings{NIPS2016_2a79ea27,\n author = {Davis, Damek and Edmunds, Brent and Udell, Madeleine},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The Sound of APALM Clapping: Faster Nonsmooth Nonconvex Optimization with Stochastic Asynchronous PALM},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2a79ea27c279e471f4d180b08d62b00a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2a79ea27c279e471f4d180b08d62b00a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/2a79ea27c279e471f4d180b08d62b00a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2a79ea27c279e471f4d180b08d62b00a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2a79ea27c279e471f4d180b08d62b00a-Reviews.html", "metareview": "", "pdf_size": 293003, "gs_citation": 51, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2889345856248327636&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Cornell University; Cornell University; University of California, Los Angeles", "aff_domain": "cornell.edu;cornell.edu;math.ucla.edu", "email": "cornell.edu;cornell.edu;math.ucla.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2a79ea27c279e471f4d180b08d62b00a-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "Cornell University;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.ucla.edu", "aff_unique_abbr": "Cornell;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "The non-convex Burer-Monteiro approach works on smooth semidefinite programs", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6966", "id": "6966", "author_site": "Nicolas Boumal, Vlad Voroninski, Afonso Bandeira", "author": "Nicolas Boumal; Vlad Voroninski; Afonso Bandeira", "abstract": "Semidefinite programs (SDP's) can be solved in polynomial time by interior point methods, but scalability can be an issue. To address this shortcoming, over a decade ago, Burer and Monteiro proposed to solve SDP's with few equality constraints via rank-restricted, non-convex surrogates. Remarkably, for some applications, local optimization methods seem to converge to global optima of these non-convex surrogates reliably. Although some theory supports this empirical success, a complete explanation of it remains an open question. In this paper, we consider a class of SDP's which includes applications such as max-cut, community detection in the stochastic block model, robust PCA, phase retrieval and synchronization of rotations. We show that the low-rank Burer-Monteiro formulation of SDP's in that class almost never has any spurious local optima.", "bibtex": "@inproceedings{NIPS2016_3de2334a,\n author = {Boumal, Nicolas and Voroninski, Vlad and Bandeira, Afonso},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {The non-convex Burer-Monteiro approach works on smooth semidefinite programs},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3de2334a314a7a72721f1f74a6cb4cee-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3de2334a314a7a72721f1f74a6cb4cee-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3de2334a314a7a72721f1f74a6cb4cee-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3de2334a314a7a72721f1f74a6cb4cee-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3de2334a314a7a72721f1f74a6cb4cee-Reviews.html", "metareview": "", "pdf_size": 309848, "gs_citation": 317, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16032063470049481750&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 14, "aff": "Department of Mathematics, Princeton University; Department of Mathematics, Massachusetts Institute of Technology; Department of Mathematics and Center for Data Science, Courant Institute of Mathematical Sciences, New York University", "aff_domain": "math.princeton.edu;math.mit.edu;cims.nyu.edu", "email": "math.princeton.edu;math.mit.edu;cims.nyu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3de2334a314a7a72721f1f74a6cb4cee-Abstract.html", "aff_unique_index": "0;1;2", "aff_unique_norm": "Princeton University;Massachusetts Institute of Technology;New York University", "aff_unique_dep": "Department of Mathematics;Department of Mathematics;Department of Mathematics", "aff_unique_url": "https://www.princeton.edu;https://web.mit.edu;https://www.nyu.edu", "aff_unique_abbr": "Princeton;MIT;NYU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;New York", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Theoretical Comparisons of Positive-Unlabeled Learning against Positive-Negative Learning", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7017", "id": "7017", "author_site": "Gang Niu, Marthinus Christoffel du Plessis, Tomoya Sakai, Yao Ma, Masashi Sugiyama", "author": "Gang Niu; Marthinus Christoffel du Plessis; Tomoya Sakai; Yao Ma; Masashi Sugiyama", "abstract": "In PU learning, a binary classifier is trained from positive (P) and unlabeled (U) data without negative (N) data. Although N data is missing, it sometimes outperforms PN learning (i.e., ordinary supervised learning). Hitherto, neither theoretical nor experimental analysis has been given to explain this phenomenon. In this paper, we theoretically compare PU (and NU) learning against PN learning based on the upper bounds on estimation errors. We find simple conditions when PU and NU learning are likely to outperform PN learning, and we prove that, in terms of the upper bounds, either PU or NU learning (depending on the class-prior probability and the sizes of P and N data) given infinite U data will improve on PN learning. Our theoretical findings well agree with the experimental results on artificial and benchmark data even when the experimental setup does not match the theoretical assumptions exactly.", "bibtex": "@inproceedings{NIPS2016_be3159ad,\n author = {Niu, Gang and du Plessis, Marthinus Christoffel and Sakai, Tomoya and Ma, Yao and Sugiyama, Masashi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Theoretical Comparisons of Positive-Unlabeled Learning against Positive-Negative Learning},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/be3159ad04564bfb90db9e32851ebf9c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/be3159ad04564bfb90db9e32851ebf9c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/be3159ad04564bfb90db9e32851ebf9c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/be3159ad04564bfb90db9e32851ebf9c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/be3159ad04564bfb90db9e32851ebf9c-Reviews.html", "metareview": "", "pdf_size": 637290, "gs_citation": 152, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9878328936341302233&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "The University of Tokyo, Japan; The University of Tokyo, Japan; The University of Tokyo, Japan; Boston University, USA; The University of Tokyo, Japan + RIKEN, Japan", "aff_domain": "ms.k.u-tokyo.ac.jp;ms.k.u-tokyo.ac.jp;ms.k.u-tokyo.ac.jp;ms.k.u-tokyo.ac.jp;ms.k.u-tokyo.ac.jp", "email": "ms.k.u-tokyo.ac.jp;ms.k.u-tokyo.ac.jp;ms.k.u-tokyo.ac.jp;ms.k.u-tokyo.ac.jp;ms.k.u-tokyo.ac.jp", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/be3159ad04564bfb90db9e32851ebf9c-Abstract.html", "aff_unique_index": "0;0;0;1;0+2", "aff_unique_norm": "University of Tokyo;Boston University;RIKEN", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.bu.edu;https://www.riken.jp", "aff_unique_abbr": "UTokyo;BU;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0+0", "aff_country_unique": "Japan;United States" }, { "title": "Threshold Bandits, With and Without Censored Feedback", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/8510", "id": "8510", "author_site": "Jacob D Abernethy, Kareem Amin, Ruihao Zhu", "author": "Jacob D. Abernethy; Kareem Amin; Ruihao Zhu", "abstract": "We consider the \\emph{Threshold Bandit} setting, a variant of the classical multi-armed bandit problem in which the reward on each round depends on a piece of side information known as a \\emph{threshold value}. The learner selects one of $K$ actions (arms), this action generates a random sample from a fixed distribution, and the action then receives a unit payoff in the event that this sample exceeds the threshold value. We consider two versions of this problem, the \\emph{uncensored} and \\emph{censored} case, that determine whether the sample is always observed or only when the threshold is not met. Using new tools to understand the popular UCB algorithm, we show that the uncensored case is essentially no more difficult than the classical multi-armed bandit setting. Finally we show that the censored case exhibits more challenges, but we give guarantees in the event that the sequence of threshold values is generated optimistically.", "bibtex": "@inproceedings{NIPS2016_0bf727e9,\n author = {Abernethy, Jacob D and Amin, Kareem and Zhu, Ruihao},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Threshold Bandits, With and Without Censored Feedback},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0bf727e907c5fc9d5356f11e4c45d613-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0bf727e907c5fc9d5356f11e4c45d613-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0bf727e907c5fc9d5356f11e4c45d613-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0bf727e907c5fc9d5356f11e4c45d613-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0bf727e907c5fc9d5356f11e4c45d613-Reviews.html", "metareview": "", "pdf_size": 260098, "gs_citation": 40, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4291512131342904793&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 6, "aff": "Department of Computer Science, University of Michigan; Department of Computer Science, University of Michigan; AeroAstro&CSAIL, MIT", "aff_domain": "umich.edu;umich.edu;mit.edu", "email": "umich.edu;umich.edu;mit.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0bf727e907c5fc9d5356f11e4c45d613-Abstract.html", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Michigan;Massachusetts Institute of Technology", "aff_unique_dep": "Department of Computer Science;AeroAstro and Computer Science and Artificial Intelligence Laboratory", "aff_unique_url": "https://www.umich.edu;https://web.mit.edu", "aff_unique_abbr": "UM;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Threshold Learning for Optimal Decision Making", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6923", "id": "6923", "author": "Nathan F Lepora", "abstract": "Decision making under uncertainty is commonly modelled as a process of competitive stochastic evidence accumulation to threshold (the drift-diffusion model). However, it is unknown how animals learn these decision thresholds. We examine threshold learning by constructing a reward function that averages over many trials to Wald's cost function that defines decision optimality. These rewards are highly stochastic and hence challenging to optimize, which we address in two ways: first, a simple two-factor reward-modulated learning rule derived from Williams' REINFORCE method for neural networks; and second, Bayesian optimization of the reward function with a Gaussian process. Bayesian optimization converges in fewer trials than REINFORCE but is slower computationally with greater variance. The REINFORCE method is also a better model of acquisition behaviour in animals and a similar learning rule has been proposed for modelling basal ganglia function.", "bibtex": "@inproceedings{NIPS2016_96c5c28b,\n author = {Lepora, Nathan F},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Threshold Learning for Optimal Decision Making},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/96c5c28becf18e71190460a9955aa4d8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/96c5c28becf18e71190460a9955aa4d8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/96c5c28becf18e71190460a9955aa4d8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/96c5c28becf18e71190460a9955aa4d8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/96c5c28becf18e71190460a9955aa4d8-Reviews.html", "metareview": "", "pdf_size": 705068, "gs_citation": 16, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=14771864396797870979&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Department of Engineering Mathematics, University of Bristol, UK", "aff_domain": "bristol.ac.uk", "email": "bristol.ac.uk", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/96c5c28becf18e71190460a9955aa4d8-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "University of Bristol", "aff_unique_dep": "Department of Engineering Mathematics", "aff_unique_url": "https://www.bristol.ac.uk", "aff_unique_abbr": "UoB", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "title": "Tight Complexity Bounds for Optimizing Composite Objectives", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7264", "id": "7264", "author_site": "Blake Woodworth, Nati Srebro", "author": "Blake E Woodworth; Nati Srebro", "abstract": "We provide tight upper and lower bounds on the complexity of minimizing the average of m convex functions using gradient and prox oracles of the component functions. We show a significant gap between the complexity of deterministic vs randomized optimization. For smooth functions, we show that accelerated gradient descent (AGD) and an accelerated variant of SVRG are optimal in the deterministic and randomized settings respectively, and that a gradient oracle is sufficient for the optimal rate. For non-smooth functions, having access to prox oracles reduces the complexity and we present optimal methods based on smoothing that improve over methods using just gradient accesses.", "bibtex": "@inproceedings{NIPS2016_645098b0,\n author = {Woodworth, Blake E and Srebro, Nati},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Tight Complexity Bounds for Optimizing Composite Objectives},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/645098b086d2f9e1e0e939c27f9f2d6f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/645098b086d2f9e1e0e939c27f9f2d6f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/645098b086d2f9e1e0e939c27f9f2d6f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/645098b086d2f9e1e0e939c27f9f2d6f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/645098b086d2f9e1e0e939c27f9f2d6f-Reviews.html", "metareview": "", "pdf_size": 417258, "gs_citation": 240, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11416938228466256313&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Toyota Technological Institute at Chicago; Toyota Technological Institute at Chicago", "aff_domain": "ttic.edu;ttic.edu", "email": "ttic.edu;ttic.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/645098b086d2f9e1e0e939c27f9f2d6f-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Toyota Technological Institute at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.tti-chicago.org", "aff_unique_abbr": "TTI Chicago", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Total Variation Classes Beyond 1d: Minimax Rates, and the Limitations of Linear Smoothers", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6898", "id": "6898", "author_site": "Veeranjaneyulu Sadhanala, Yu-Xiang Wang, Ryan Tibshirani", "author": "Veeranjaneyulu Sadhanala; Yu-Xiang Wang; Ryan J Tibshirani", "abstract": "We consider the problem of estimating a function defined over $n$ locations on a $d$-dimensional grid (having all side lengths equal to $n^{1/d}$). When the function is constrained to have discrete total variation bounded by $C_n$, we derive the minimax optimal (squared) $\\ell_2$ estimation error rate, parametrized by $n, C_n$. Total variation denoising, also known as the fused lasso, is seen to be rate optimal. Several simpler estimators exist, such as Laplacian smoothing and Laplacian eigenmaps. A natural question is: can these simpler estimators perform just as well? We prove that these estimators, and more broadly all estimators given by linear transformations of the input data, are suboptimal over the class of functions with bounded variation. This extends fundamental findings of Donoho and Johnstone (1998) on 1-dimensional total variation spaces to higher dimensions. The implication is that the computationally simpler methods cannot be used for such sophisticated denoising tasks, without sacrificing statistical accuracy. We also derive minimax rates for discrete Sobolev spaces over $d$-dimensional grids, which are, in some sense, smaller than the total variation function spaces. Indeed, these are small enough spaces that linear estimators can be optimal---and a few well-known ones are, such as Laplacian smoothing and Laplacian eigenmaps, as we show. Lastly, we investigate the adaptivity of the total variation denoiser to these smaller Sobolev function spaces.", "bibtex": "@inproceedings{NIPS2016_17ed8abe,\n author = {Sadhanala, Veeranjaneyulu and Wang, Yu-Xiang and Tibshirani, Ryan J},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Total Variation Classes Beyond 1d: Minimax Rates, and the Limitations of Linear Smoothers},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/17ed8abedc255908be746d245e50263a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/17ed8abedc255908be746d245e50263a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/17ed8abedc255908be746d245e50263a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/17ed8abedc255908be746d245e50263a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/17ed8abedc255908be746d245e50263a-Reviews.html", "metareview": "", "pdf_size": 715792, "gs_citation": 92, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6037852238564630616&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "Machine Learning Department, Carnegie Mellon University; Machine Learning Department, Carnegie Mellon University; Department of Statistics, Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cs.cmu.edu;stat.cmu.edu", "email": "cs.cmu.edu;cs.cmu.edu;stat.cmu.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/17ed8abedc255908be746d245e50263a-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "Machine Learning Department", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Toward Deeper Understanding of Neural Networks: The Power of Initialization and a Dual View on Expressivity", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7402", "id": "7402", "author_site": "Amit Daniely, Roy Frostig, Yoram Singer", "author": "Amit Daniely; Roy Frostig; Yoram Singer", "abstract": "We develop a general duality between neural networks and compositional kernel Hilbert spaces. We introduce the notion of a computation skeleton, an acyclic graph that succinctly describes both a family of neural networks and a kernel space. Random neural networks are generated from a skeleton through node replication followed by sampling from a normal distribution to assign weights. The kernel space consists of functions that arise by compositions, averaging, and non-linear transformations governed by the skeleton's graph topology and activation functions. We prove that random networks induce representations which approximate the kernel space. In particular, it follows that random weight initialization often yields a favorable starting point for optimization despite the worst-case intractability of training neural networks.", "bibtex": "@inproceedings{NIPS2016_abea47ba,\n author = {Daniely, Amit and Frostig, Roy and Singer, Yoram},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Toward Deeper Understanding of Neural Networks: The Power of Initialization and a Dual View on Expressivity},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/abea47ba24142ed16b7d8fbf2c740e0d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/abea47ba24142ed16b7d8fbf2c740e0d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/abea47ba24142ed16b7d8fbf2c740e0d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/abea47ba24142ed16b7d8fbf2c740e0d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/abea47ba24142ed16b7d8fbf2c740e0d-Reviews.html", "metareview": "", "pdf_size": 368816, "gs_citation": 409, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18164411814602923193&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Google Brain; Google Brain + Stanford University; Google Brain", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/abea47ba24142ed16b7d8fbf2c740e0d-Abstract.html", "aff_unique_index": "0;0+1;0", "aff_unique_norm": "Google;Stanford University", "aff_unique_dep": "Google Brain;", "aff_unique_url": "https://brain.google.com;https://www.stanford.edu", "aff_unique_abbr": "Google Brain;Stanford", "aff_campus_unique_index": "0;0+1;0", "aff_campus_unique": "Mountain View;Stanford", "aff_country_unique_index": "0;0+0;0", "aff_country_unique": "United States" }, { "title": "Towards Conceptual Compression", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7110", "id": "7110", "author_site": "Karol Gregor, Frederic Besse, Danilo Jimenez Rezende, Ivo Danihelka, Daan Wierstra", "author": "Karol Gregor; Frederic Besse; Danilo Jimenez Rezende; Ivo Danihelka; Daan Wierstra", "abstract": "We introduce convolutional DRAW, a homogeneous deep generative model achieving state-of-the-art performance in latent variable image modeling. The algorithm naturally stratifies information into higher and lower level details, creating abstract features and as such addressing one of the fundamentally desired properties of representation learning. Furthermore, the hierarchical ordering of its latents creates the opportunity to selectively store global information about an image, yielding a high quality 'conceptual compression' framework.", "bibtex": "@inproceedings{NIPS2016_4abe17a1,\n author = {Gregor, Karol and Besse, Frederic and Jimenez Rezende, Danilo and Danihelka, Ivo and Wierstra, Daan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Towards Conceptual Compression},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/4abe17a1c80cbdd2aa241b70840879de-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/4abe17a1c80cbdd2aa241b70840879de-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/4abe17a1c80cbdd2aa241b70840879de-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/4abe17a1c80cbdd2aa241b70840879de-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/4abe17a1c80cbdd2aa241b70840879de-Reviews.html", "metareview": "", "pdf_size": 2121427, "gs_citation": 298, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13694779565607551693&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind", "aff_domain": "google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/4abe17a1c80cbdd2aa241b70840879de-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Towards Unifying Hamiltonian Monte Carlo and Slice Sampling", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7232", "id": "7232", "author_site": "Yizhe Zhang, Xiangyu Wang, Changyou Chen, Ricardo Henao, Kai Fan, Lawrence Carin", "author": "Yizhe Zhang; Xiangyu Wang; Changyou Chen; Ricardo Henao; Kai Fan; Lawrence Carin", "abstract": "We unify slice sampling and Hamiltonian Monte Carlo (HMC) sampling, demonstrating their connection via the Hamiltonian-Jacobi equation from Hamiltonian mechanics. This insight enables extension of HMC and slice sampling to a broader family of samplers, called Monomial Gamma Samplers (MGS). We provide a theoretical analysis of the mixing performance of such samplers, proving that in the limit of a single parameter, the MGS draws decorrelated samples from the desired target distribution. We further show that as this parameter tends toward this limit, performance gains are achieved at a cost of increasing numerical difficulty and some practical convergence issues. Our theoretical results are validated with synthetic data and real-world applications.", "bibtex": "@inproceedings{NIPS2016_3cef96dc,\n author = {Zhang, Yizhe and Wang, Xiangyu and Chen, Changyou and Henao, Ricardo and Fan, Kai and Carin, Lawrence},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Towards Unifying Hamiltonian Monte Carlo and Slice Sampling},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3cef96dcc9b8035d23f69e30bb19218a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/3cef96dcc9b8035d23f69e30bb19218a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/3cef96dcc9b8035d23f69e30bb19218a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/3cef96dcc9b8035d23f69e30bb19218a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/3cef96dcc9b8035d23f69e30bb19218a-Reviews.html", "metareview": "", "pdf_size": 1188618, "gs_citation": 24, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=864527749656212145&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Duke University; Duke University; Duke University; Duke University; Duke University; Duke University", "aff_domain": "duke.edu;duke.edu;duke.edu;duke.edu;duke.edu;duke.edu", "email": "duke.edu;duke.edu;duke.edu;duke.edu;duke.edu;duke.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/3cef96dcc9b8035d23f69e30bb19218a-Abstract.html", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Duke University", "aff_unique_dep": "", "aff_unique_url": "https://www.duke.edu", "aff_unique_abbr": "Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Tracking the Best Expert in Non-stationary Stochastic Environments", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7001", "id": "7001", "author_site": "Chen-Yu Wei, Yi-Te Hong, Chi-Jen Lu", "author": "Chen-Yu Wei; Yi-Te Hong; Chi-Jen Lu", "abstract": "We study the dynamic regret of multi-armed bandit and experts problem in non-stationary stochastic environments. We introduce a new parameter $\\W$, which measures the total statistical variance of the loss distributions over $T$ rounds of the process, and study how this amount affects the regret. We investigate the interaction between $\\W$ and $\\Gamma$, which counts the number of times the distributions change, as well as $\\W$ and $V$, which measures how far the distributions deviates over time. One striking result we find is that even when $\\Gamma$, $V$, and $\\Lambda$ are all restricted to constant, the regret lower bound in the bandit setting still grows with $T$. The other highlight is that in the full-information setting, a constant regret becomes achievable with constant $\\Gamma$ and $\\Lambda$, as it can be made independent of $T$, while with constant $V$ and $\\Lambda$, the regret still has a $T^{1/3}$ dependency. We not only propose algorithms with upper bound guarantee, but prove their matching lower bounds as well.", "bibtex": "@inproceedings{NIPS2016_405e2890,\n author = {Wei, Chen-Yu and Hong, Yi-Te and Lu, Chi-Jen},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Tracking the Best Expert in Non-stationary Stochastic Environments},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/405e28906322882c5be9b4b27f4c35fd-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/405e28906322882c5be9b4b27f4c35fd-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/405e28906322882c5be9b4b27f4c35fd-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/405e28906322882c5be9b4b27f4c35fd-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/405e28906322882c5be9b4b27f4c35fd-Reviews.html", "metareview": "", "pdf_size": 274461, "gs_citation": 75, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=8094081763202004840&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Institute of Information Science, Academia Sinica, Taiwan; Institute of Information Science, Academia Sinica, Taiwan; Institute of Information Science, Academia Sinica, Taiwan", "aff_domain": "iis.sinica.edu.tw;iis.sinica.edu.tw;iis.sinica.edu.tw", "email": "iis.sinica.edu.tw;iis.sinica.edu.tw;iis.sinica.edu.tw", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/405e28906322882c5be9b4b27f4c35fd-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Academia Sinica", "aff_unique_dep": "Institute of Information Science", "aff_unique_url": "https://www.sinica.edu.tw", "aff_unique_abbr": "AS", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "title": "Tractable Operations for Arithmetic Circuits of Probabilistic Models", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7223", "id": "7223", "author_site": "Yujia Shen, Arthur Choi, Adnan Darwiche", "author": "Yujia Shen; Arthur Choi; Adnan Darwiche", "abstract": "We consider tractable representations of probability distributions and the polytime operations they support. In particular, we consider a recently proposed arithmetic circuit representation, the Probabilistic Sentential Decision Diagram (PSDD). We show that PSDD supports a polytime multiplication operator, while they do not support a polytime operator for summing-out variables. A polytime multiplication operator make PSDDs suitable for a broader class of applications compared to arithmetic circuits, which do not in general support multiplication. As one example, we show that PSDD multiplication leads to a very simple but effective compilation algorithm for probabilistic graphical models: represent each model factor as a PSDD, and then multiply them.", "bibtex": "@inproceedings{NIPS2016_5a7f963e,\n author = {Shen, Yujia and Choi, Arthur and Darwiche, Adnan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Tractable Operations for Arithmetic Circuits of Probabilistic Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/5a7f963e5e0504740c3a6b10bb6d4fa5-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/5a7f963e5e0504740c3a6b10bb6d4fa5-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/5a7f963e5e0504740c3a6b10bb6d4fa5-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/5a7f963e5e0504740c3a6b10bb6d4fa5-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/5a7f963e5e0504740c3a6b10bb6d4fa5-Reviews.html", "metareview": "", "pdf_size": 598220, "gs_citation": 85, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9296857513471439368&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 6, "aff": "Computer Science Department, University of California, Los Angeles, CA 90095; Computer Science Department, University of California, Los Angeles, CA 90095; Computer Science Department, University of California, Los Angeles, CA 90095", "aff_domain": "cs.ucla.edu;cs.ucla.edu;cs.ucla.edu", "email": "cs.ucla.edu;cs.ucla.edu;cs.ucla.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/5a7f963e5e0504740c3a6b10bb6d4fa5-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Training and Evaluating Multimodal Word Embeddings with Large-scale Web Annotated Images", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7148", "id": "7148", "author_site": "Junhua Mao, Jiajing Xu, Kevin Jing, Alan Yuille", "author": "Junhua Mao; Jiajing Xu; Kevin Jing; Alan L. Yuille", "abstract": "In this paper, we focus on training and evaluating effective word embeddings with both text and visual information. More specifically, we introduce a large-scale dataset with 300 million sentences describing over 40 million images crawled and downloaded from publicly available Pins (i.e. an image with sentence descriptions uploaded by users) on Pinterest. This dataset is more than 200 times larger than MS COCO, the standard large-scale image dataset with sentence descriptions. In addition, we construct an evaluation dataset to directly assess the effectiveness of word embeddings in terms of finding semantically similar or related words and phrases. The word/phrase pairs in this evaluation dataset are collected from the click data with millions of users in an image search system, thus contain rich semantic relationships. Based on these datasets, we propose and compare several Recurrent Neural Networks (RNNs) based multimodal (text and image) models. Experiments show that our model benefits from incorporating the visual information into the word embeddings, and a weight sharing strategy is crucial for learning such multimodal embeddings. The project page is: http://www.stat.ucla.edu/~junhua.mao/multimodal_embedding.html (The datasets introduced in this work will be gradually released on the project page.).", "bibtex": "@inproceedings{NIPS2016_c24cd76e,\n author = {Mao, Junhua and Xu, Jiajing and Jing, Kevin and Yuille, Alan L},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Training and Evaluating Multimodal Word Embeddings with Large-scale Web Annotated Images},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c24cd76e1ce41366a4bbe8a49b02a028-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c24cd76e1ce41366a4bbe8a49b02a028-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c24cd76e1ce41366a4bbe8a49b02a028-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c24cd76e1ce41366a4bbe8a49b02a028-Reviews.html", "metareview": "", "pdf_size": 1643201, "gs_citation": 64, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17322767449414954101&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "University of California, Los Angeles; Pinterest Inc.; Pinterest Inc.; University of California, Los Angeles+Johns Hopkins University", "aff_domain": "ucla.edu;pinterest.com;pinterest.com;gmail.com", "email": "ucla.edu;pinterest.com;pinterest.com;gmail.com", "github": "", "project": "http://www.stat.ucla.edu/~junhua.mao/multimodal_embedding.html", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c24cd76e1ce41366a4bbe8a49b02a028-Abstract.html", "aff_unique_index": "0;1;1;0+2", "aff_unique_norm": "University of California, Los Angeles;Pinterest;Johns Hopkins University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucla.edu;https://www.pinterest.com;https://www.jhu.edu", "aff_unique_abbr": "UCLA;Pinterest;JHU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0+0", "aff_country_unique": "United States" }, { "title": "Tree-Structured Reinforcement Learning for Sequential Object Localization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6965", "id": "6965", "author_site": "Zequn Jie, Xiaodan Liang, Jiashi Feng, Xiaojie Jin, Wen Lu, Shuicheng Yan", "author": "Zequn Jie; Xiaodan Liang; Jiashi Feng; Xiaojie Jin; Wen Lu; Shuicheng Yan", "abstract": "Existing object proposal algorithms usually search for possible object regions over multiple locations and scales \\emph{ separately}, which ignore the interdependency among different objects and deviate from the human perception procedure. To incorporate global interdependency between objects into object localization, we propose an effective Tree-structured Reinforcement Learning (Tree-RL) approach to sequentially search for objects by fully exploiting both the current observation and historical search paths. The Tree-RL approach learns multiple searching policies through maximizing the long-term reward that reflects localization accuracies over all the objects. Starting with taking the entire image as a proposal, the Tree-RL approach allows the agent to sequentially discover multiple objects via a tree-structured traversing scheme. Allowing multiple near-optimal policies, Tree-RL offers more diversity in search paths and is able to find multiple objects with a single feed-forward pass. Therefore, Tree-RL can better cover different objects with various scales which is quite appealing in the context of object proposal. Experiments on PASCAL VOC 2007 and 2012 validate the effectiveness of the Tree-RL, which can achieve comparable recalls with current object proposal algorithms via much fewer candidate windows.", "bibtex": "@inproceedings{NIPS2016_812b4ba2,\n author = {Jie, Zequn and Liang, Xiaodan and Feng, Jiashi and Jin, Xiaojie and Lu, Wen and Yan, Shuicheng},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Tree-Structured Reinforcement Learning for Sequential Object Localization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/812b4ba287f5ee0bc9d43bbf5bbe87fb-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/812b4ba287f5ee0bc9d43bbf5bbe87fb-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/812b4ba287f5ee0bc9d43bbf5bbe87fb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/812b4ba287f5ee0bc9d43bbf5bbe87fb-Reviews.html", "metareview": "", "pdf_size": 1958051, "gs_citation": 161, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5748905449681154528&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "National University of Singapore; Carnegie Mellon University; National University of Singapore; National University of Singapore; National University of Singapore; National University of Singapore", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/812b4ba287f5ee0bc9d43bbf5bbe87fb-Abstract.html", "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "National University of Singapore;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.cmu.edu", "aff_unique_abbr": "NUS;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "Singapore;United States" }, { "title": "Truncated Variance Reduction: A Unified Approach to Bayesian Optimization and Level-Set Estimation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7320", "id": "7320", "author_site": "Ilija Bogunovic, Jonathan Scarlett, Andreas Krause, Volkan Cevher", "author": "Ilija Bogunovic; Jonathan Scarlett; Andreas Krause; Volkan Cevher", "abstract": "We present a new algorithm, truncated variance reduction (TruVaR), that treats Bayesian optimization (BO) and level-set estimation (LSE) with Gaussian processes in a unified fashion. The algorithm greedily shrinks a sum of truncated variances within a set of potential maximizers (BO) or unclassified points (LSE), which is updated based on confidence bounds. TruVaR is effective in several important settings that are typically non-trivial to incorporate into myopic algorithms, including pointwise costs and heteroscedastic noise. We provide a general theoretical guarantee for TruVaR covering these aspects, and use it to recover and strengthen existing results on BO and LSE. Moreover, we provide a new result for a setting where one can select from a number of noise levels having associated costs. We demonstrate the effectiveness of the algorithm on both synthetic and real-world data sets.", "bibtex": "@inproceedings{NIPS2016_ce78d1da,\n author = {Bogunovic, Ilija and Scarlett, Jonathan and Krause, Andreas and Cevher, Volkan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Truncated Variance Reduction: A Unified Approach to Bayesian Optimization and Level-Set Estimation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ce78d1da254c0843eb23951ae077ff5f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ce78d1da254c0843eb23951ae077ff5f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ce78d1da254c0843eb23951ae077ff5f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ce78d1da254c0843eb23951ae077ff5f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ce78d1da254c0843eb23951ae077ff5f-Reviews.html", "metareview": "", "pdf_size": 1018726, "gs_citation": 105, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18276635671845108484&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 14, "aff": "Laboratory for Information and Inference Systems (LIONS), EPFL; Laboratory for Information and Inference Systems (LIONS), EPFL; Learning and Adaptive Systems Group, ETH Z\u00fcrich; Laboratory for Information and Inference Systems (LIONS), EPFL", "aff_domain": "epfl.ch;epfl.ch;ethz.ch;epfl.ch", "email": "epfl.ch;epfl.ch;ethz.ch;epfl.ch", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ce78d1da254c0843eb23951ae077ff5f-Abstract.html", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "EPFL;ETH Zurich", "aff_unique_dep": "Laboratory for Information and Inference Systems (LIONS);Learning and Adaptive Systems Group", "aff_unique_url": "https://www.epfl.ch;https://www.ethz.ch", "aff_unique_abbr": "EPFL;ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Understanding Probabilistic Sparse Gaussian Process Approximations", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7065", "id": "7065", "author_site": "Matthias Bauer, Mark van der Wilk, Carl Edward Rasmussen", "author": "Matthias Bauer; Mark van der Wilk; Carl Edward Rasmussen", "abstract": "Good sparse approximations are essential for practical inference in Gaussian Processes as the computational cost of exact methods is prohibitive for large datasets. The Fully Independent Training Conditional (FITC) and the Variational Free Energy (VFE) approximations are two recent popular methods. Despite superficial similarities, these approximations have surprisingly different theoretical properties and behave differently in practice. We thoroughly investigate the two methods for regression both analytically and through illustrative examples, and draw conclusions to guide practical application.", "bibtex": "@inproceedings{NIPS2016_7250eb93,\n author = {Bauer, Matthias and van der Wilk, Mark and Rasmussen, Carl Edward},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Understanding Probabilistic Sparse Gaussian Process Approximations},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7250eb93b3c18cc9daa29cf58af7a004-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7250eb93b3c18cc9daa29cf58af7a004-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7250eb93b3c18cc9daa29cf58af7a004-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7250eb93b3c18cc9daa29cf58af7a004-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7250eb93b3c18cc9daa29cf58af7a004-Reviews.html", "metareview": "", "pdf_size": 564916, "gs_citation": 340, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16764934864250724174&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "Department of Engineering, University of Cambridge, Cambridge, UK + Max Planck Institute for Intelligent Systems, T\u00fcbingen, Germany; Department of Engineering, University of Cambridge, Cambridge, UK; Department of Engineering, University of Cambridge, Cambridge, UK", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk", "email": "cam.ac.uk;cam.ac.uk;cam.ac.uk", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7250eb93b3c18cc9daa29cf58af7a004-Abstract.html", "aff_unique_index": "0+1;0;0", "aff_unique_norm": "University of Cambridge;Max Planck Institute for Intelligent Systems", "aff_unique_dep": "Department of Engineering;", "aff_unique_url": "https://www.cam.ac.uk;https://www.mpi-is.mpg.de", "aff_unique_abbr": "Cambridge;MPI-IS", "aff_campus_unique_index": "0+1;0;0", "aff_campus_unique": "Cambridge;T\u00fcbingen", "aff_country_unique_index": "0+1;0;0", "aff_country_unique": "United Kingdom;Germany" }, { "title": "Understanding the Effective Receptive Field in Deep Convolutional Neural Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7398", "id": "7398", "author_site": "Wenjie Luo, Yujia Li, Raquel Urtasun, Richard Zemel", "author": "Wenjie Luo; Yujia Li; Raquel Urtasun; Richard Zemel", "abstract": "We study characteristics of receptive fields of units in deep convolutional networks. The receptive field size is a crucial issue in many visual tasks, as the output must respond to large enough areas in the image to capture information about large objects. We introduce the notion of an effective receptive field size, and show that it both has a Gaussian distribution and only occupies a fraction of the full theoretical receptive field size. We analyze the effective receptive field in several architecture designs, and the effect of sub-sampling, skip connections, dropout and nonlinear activations on it. This leads to suggestions for ways to address its tendency to be too small.", "bibtex": "@inproceedings{NIPS2016_c8067ad1,\n author = {Luo, Wenjie and Li, Yujia and Urtasun, Raquel and Zemel, Richard},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Understanding the Effective Receptive Field in Deep Convolutional Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c8067ad1937f728f51288b3eb986afaa-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c8067ad1937f728f51288b3eb986afaa-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c8067ad1937f728f51288b3eb986afaa-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c8067ad1937f728f51288b3eb986afaa-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c8067ad1937f728f51288b3eb986afaa-Reviews.html", "metareview": "", "pdf_size": 615339, "gs_citation": 2531, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12122802369550112103&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Department of Computer Science, University of Toronto; Department of Computer Science, University of Toronto; Department of Computer Science, University of Toronto; Department of Computer Science, University of Toronto", "aff_domain": "cs.toronto.edu;cs.toronto.edu;cs.toronto.edu;cs.toronto.edu", "email": "cs.toronto.edu;cs.toronto.edu;cs.toronto.edu;cs.toronto.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c8067ad1937f728f51288b3eb986afaa-Abstract.html", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Toronto", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "title": "Unified Methods for Exploiting Piecewise Linear Structure in Convex Optimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7207", "id": "7207", "author_site": "Tyler Johnson, Carlos Guestrin", "author": "Tyler B Johnson; Carlos Guestrin", "abstract": "We develop methods for rapidly identifying important components of a convex optimization problem for the purpose of achieving fast convergence times. By considering a novel problem formulation\u2014the minimization of a sum of piecewise functions\u2014we describe a principled and general mechanism for exploiting piecewise linear structure in convex optimization. This result leads to a theoretically justified working set algorithm and a novel screening test, which generalize and improve upon many prior results on exploiting structure in convex optimization. In empirical comparisons, we study the scalability of our methods. We find that screening scales surprisingly poorly with the size of the problem, while our working set algorithm convincingly outperforms alternative approaches.", "bibtex": "@inproceedings{NIPS2016_cb2c2041,\n author = {Johnson, Tyler B and Guestrin, Carlos},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unified Methods for Exploiting Piecewise Linear Structure in Convex Optimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/cb2c2041d9763d84d7d655e81178f444-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/cb2c2041d9763d84d7d655e81178f444-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/cb2c2041d9763d84d7d655e81178f444-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/cb2c2041d9763d84d7d655e81178f444-Reviews.html", "metareview": "", "pdf_size": 2235219, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16365192723785896940&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "University of Washington, Seattle; University of Washington, Seattle", "aff_domain": "washington.edu;cs.washington.edu", "email": "washington.edu;cs.washington.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/cb2c2041d9763d84d7d655e81178f444-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Unifying Count-Based Exploration and Intrinsic Motivation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7299", "id": "7299", "author_site": "Marc Bellemare, Sriram Srinivasan, Georg Ostrovski, Tom Schaul, David Saxton, Remi Munos", "author": "Marc Bellemare; Sriram Srinivasan; Georg Ostrovski; Tom Schaul; David Saxton; Remi Munos", "abstract": "We consider an agent's uncertainty about its environment and the problem of generalizing this uncertainty across states. Specifically, we focus on the problem of exploration in non-tabular reinforcement learning. Drawing inspiration from the intrinsic motivation literature, we use density models to measure uncertainty, and propose a novel algorithm for deriving a pseudo-count from an arbitrary density model. This technique enables us to generalize count-based exploration algorithms to the non-tabular case. We apply our ideas to Atari 2600 games, providing sensible pseudo-counts from raw pixels. We transform these pseudo-counts into exploration bonuses and obtain significantly improved exploration in a number of hard games, including the infamously difficult Montezuma's Revenge.", "bibtex": "@inproceedings{NIPS2016_afda3322,\n author = {Bellemare, Marc and Srinivasan, Sriram and Ostrovski, Georg and Schaul, Tom and Saxton, David and Munos, Remi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unifying Count-Based Exploration and Intrinsic Motivation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/afda332245e2af431fb7b672a68b659d-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/afda332245e2af431fb7b672a68b659d-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/afda332245e2af431fb7b672a68b659d-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/afda332245e2af431fb7b672a68b659d-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/afda332245e2af431fb7b672a68b659d-Reviews.html", "metareview": "", "pdf_size": 726091, "gs_citation": 1892, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7667515176664990362&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/afda332245e2af431fb7b672a68b659d-Abstract.html", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Universal Correspondence Network", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7416", "id": "7416", "author_site": "Christopher B Choy, Manmohan Chandraker, JunYoung Gwak, Silvio Savarese", "author": "Christopher B Choy; JunYoung Gwak; Silvio Savarese; Manmohan Chandraker", "abstract": "We present a deep learning framework for accurate visual correspondences and demonstrate its effectiveness for both geometric and semantic matching, spanning across rigid motions to intra-class shape or appearance variations. In contrast to previous CNN-based approaches that optimize a surrogate patch similarity objective, we use deep metric learning to directly learn a feature space that preserves either geometric or semantic similarity. Our fully convolutional architecture, along with a novel correspondence contrastive loss allows faster training by effective reuse of computations, accurate gradient computation through the use of thousands of examples per image pair and faster testing with $O(n)$ feedforward passes for n keypoints, instead of $O(n^2)$ for typical patch similarity methods. We propose a convolutional spatial transformer to mimic patch normalization in traditional features like SIFT, which is shown to dramatically boost accuracy for semantic correspondences across intra-class shape variations. Extensive experiments on KITTI, PASCAL and CUB-2011 datasets demonstrate the significant advantages of our features over prior works that use either hand-constructed or learned features.", "bibtex": "@inproceedings{NIPS2016_b495ce63,\n author = {Choy, Christopher B and Gwak, JunYoung and Savarese, Silvio and Chandraker, Manmohan},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Universal Correspondence Network},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/b495ce63ede0f4efc9eec62cb947c162-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/b495ce63ede0f4efc9eec62cb947c162-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/b495ce63ede0f4efc9eec62cb947c162-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/b495ce63ede0f4efc9eec62cb947c162-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/b495ce63ede0f4efc9eec62cb947c162-Reviews.html", "metareview": "", "pdf_size": 7321008, "gs_citation": 474, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=1872346918987935814&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Stanford University; Stanford University; Stanford University; NEC Laboratories America, Inc.", "aff_domain": "ai.stanford.edu;ai.stanford.edu;stanford.edu;nec-labs.com", "email": "ai.stanford.edu;ai.stanford.edu;stanford.edu;nec-labs.com", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/b495ce63ede0f4efc9eec62cb947c162-Abstract.html", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Stanford University;NEC Laboratories America", "aff_unique_dep": ";", "aff_unique_url": "https://www.stanford.edu;https://www.nec-labs.com", "aff_unique_abbr": "Stanford;NEC Labs America", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "title": "Unsupervised Domain Adaptation with Residual Transfer Networks", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7059", "id": "7059", "author_site": "Mingsheng Long, Han Zhu, Jianmin Wang, Michael Jordan", "author": "Mingsheng Long; Han Zhu; Jianmin Wang; Michael I Jordan", "abstract": "The recent success of deep neural networks relies on massive amounts of labeled data. For a target task where labeled data is unavailable, domain adaptation can transfer a learner from a different source domain. In this paper, we propose a new approach to domain adaptation in deep networks that can jointly learn adaptive classifiers and transferable features from labeled data in the source domain and unlabeled data in the target domain. We relax a shared-classifier assumption made by previous methods and assume that the source classifier and target classifier differ by a residual function. We enable classifier adaptation by plugging several layers into deep network to explicitly learn the residual function with reference to the target classifier. We fuse features of multiple layers with tensor product and embed them into reproducing kernel Hilbert spaces to match distributions for feature adaptation. The adaptation can be achieved in most feed-forward models by extending them with new residual layers and loss functions, which can be trained efficiently via back-propagation. Empirical evidence shows that the new approach outperforms state of the art methods on standard domain adaptation benchmarks.", "bibtex": "@inproceedings{NIPS2016_ac627ab1,\n author = {Long, Mingsheng and Zhu, Han and Wang, Jianmin and Jordan, Michael I},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Domain Adaptation with Residual Transfer Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ac627ab1ccbdb62ec96e702f07f6425b-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ac627ab1ccbdb62ec96e702f07f6425b-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ac627ab1ccbdb62ec96e702f07f6425b-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ac627ab1ccbdb62ec96e702f07f6425b-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ac627ab1ccbdb62ec96e702f07f6425b-Reviews.html", "metareview": "", "pdf_size": 528410, "gs_citation": 1833, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=12070836061117770706&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "KLiss, MOE; TNList; School of Software, Tsinghua University, China; University of California, Berkeley, Berkeley, USA", "aff_domain": "tsinghua.edu.cn;gmail.com;tsinghua.edu.cn;berkeley.edu", "email": "tsinghua.edu.cn;gmail.com;tsinghua.edu.cn;berkeley.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ac627ab1ccbdb62ec96e702f07f6425b-Abstract.html", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Ministry of Education;TNLIST;Tsinghua University;University of California, Berkeley", "aff_unique_dep": ";;School of Software;", "aff_unique_url": ";;https://www.tsinghua.edu.cn;https://www.berkeley.edu", "aff_unique_abbr": "MOE;;THU;UC Berkeley", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;2;3", "aff_country_unique": "Unknown;;China;United States" }, { "title": "Unsupervised Feature Extraction by Time-Contrastive Learning and Nonlinear ICA", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7221", "id": "7221", "author_site": "Aapo Hyvarinen, Hiroshi Morioka", "author": "Aapo Hyvarinen; Hiroshi Morioka", "abstract": "Nonlinear independent component analysis (ICA) provides an appealing framework for unsupervised feature learning, but the models proposed so far are not identifiable. Here, we first propose a new intuitive principle of unsupervised deep learning from time series which uses the nonstationary structure of the data. Our learning principle, time-contrastive learning (TCL), finds a representation which allows optimal discrimination of time segments (windows). Surprisingly, we show how TCL can be related to a nonlinear ICA model, when ICA is redefined to include temporal nonstationarities. In particular, we show that TCL combined with linear ICA estimates the nonlinear ICA model up to point-wise transformations of the sources, and this solution is unique --- thus providing the first identifiability result for nonlinear ICA which is rigorous, constructive, as well as very general.", "bibtex": "@inproceedings{NIPS2016_d305281f,\n author = {Hyvarinen, Aapo and Morioka, Hiroshi},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Feature Extraction by Time-Contrastive Learning and Nonlinear ICA},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d305281faf947ca7acade9ad5c8c818c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d305281faf947ca7acade9ad5c8c818c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d305281faf947ca7acade9ad5c8c818c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d305281faf947ca7acade9ad5c8c818c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d305281faf947ca7acade9ad5c8c818c-Reviews.html", "metareview": "", "pdf_size": 1498209, "gs_citation": 490, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=363753999925360434&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science and HIIT, University of Helsinki, Finland+Gatsby Computational Neuroscience Unit, University College London, UK; Department of Computer Science and HIIT, University of Helsinki, Finland", "aff_domain": "; ", "email": "; ", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d305281faf947ca7acade9ad5c8c818c-Abstract.html", "aff_unique_index": "0+1;0", "aff_unique_norm": "University of Helsinki;University College London", "aff_unique_dep": "Department of Computer Science and HIIT;Gatsby Computational Neuroscience Unit", "aff_unique_url": "https://www.helsinki.fi;https://www.ucl.ac.uk", "aff_unique_abbr": "UH;UCL", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0+1;0", "aff_country_unique": "Finland;United Kingdom" }, { "title": "Unsupervised Learning for Physical Interaction through Video Prediction", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6922", "id": "6922", "author_site": "Chelsea Finn, Ian Goodfellow, Sergey Levine", "author": "Chelsea Finn; Ian Goodfellow; Sergey Levine", "abstract": "A core challenge for an agent learning to interact with the world is to predict how its actions affect objects in its environment. Many existing methods for learning the dynamics of physical interactions require labeled object information. However, to scale real-world interaction learning to a variety of scenes and objects, acquiring labeled data becomes increasingly impractical. To learn about physical object motion without labels, we develop an action-conditioned video prediction model that explicitly models pixel motion, by predicting a distribution over pixel motion from previous frames. Because our model explicitly predicts motion, it is partially invariant to object appearance, enabling it to generalize to previously unseen objects. To explore video prediction for real-world interactive agents, we also introduce a dataset of 59,000 robot interactions involving pushing motions, including a test set with novel objects. In this dataset, accurate prediction of videos conditioned on the robot's future actions amounts to learning a \"visual imagination\" of different futures based on different courses of action. Our experiments show that our proposed method produces more accurate video predictions both quantitatively and qualitatively, when compared to prior methods.", "bibtex": "@inproceedings{NIPS2016_d9d4f495,\n author = {Finn, Chelsea and Goodfellow, Ian and Levine, Sergey},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Learning for Physical Interaction through Video Prediction},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/d9d4f495e875a2e075a1a4a6e1b9770f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/d9d4f495e875a2e075a1a4a6e1b9770f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/d9d4f495e875a2e075a1a4a6e1b9770f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/d9d4f495e875a2e075a1a4a6e1b9770f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/d9d4f495e875a2e075a1a4a6e1b9770f-Reviews.html", "metareview": "", "pdf_size": 1393887, "gs_citation": 1308, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5380767711147691375&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "UC Berkeley; OpenAI; Google Brain + UC Berkeley", "aff_domain": "eecs.berkeley.edu;openai.com;google.com", "email": "eecs.berkeley.edu;openai.com;google.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/d9d4f495e875a2e075a1a4a6e1b9770f-Abstract.html", "aff_unique_index": "0;1;2+0", "aff_unique_norm": "University of California, Berkeley;OpenAI;Google", "aff_unique_dep": ";;Google Brain", "aff_unique_url": "https://www.berkeley.edu;https://openai.com;https://brain.google.com", "aff_unique_abbr": "UC Berkeley;OpenAI;Google Brain", "aff_campus_unique_index": "0;2+0", "aff_campus_unique": "Berkeley;;Mountain View", "aff_country_unique_index": "0;0;0+0", "aff_country_unique": "United States" }, { "title": "Unsupervised Learning from Noisy Networks with Applications to Hi-C Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7231", "id": "7231", "author_site": "Bo Wang, Junjie Zhu, Armin Pourshafeie, Oana Ursu, Serafim Batzoglou, Anshul Kundaje", "author": "Bo Wang; Junjie Zhu; Armin Pourshafeie; Oana Ursu; Serafim Batzoglou; Anshul Kundaje", "abstract": "Complex networks play an important role in a plethora of disciplines in natural sciences. Cleaning up noisy observed networks, poses an important challenge in network analysis Existing methods utilize labeled data to alleviate the noise effect in the network. However, labeled data is usually expensive to collect while unlabeled data can be gathered cheaply. In this paper, we propose an optimization framework to mine useful structures from noisy networks in an unsupervised manner. The key feature of our optimization framework is its ability to utilize local structures as well as global patterns in the network. We extend our method to incorporate multi-resolution networks in order to add further resistance to high-levels of noise. We also generalize our framework to utilize partial labels to enhance the performance. We specifically focus our method on multi-resolution Hi-C data by recovering clusters of genomic regions that co-localize in 3D space. Additionally, we use Capture-C-generated partial labels to further denoise the Hi-C network. We empirically demonstrate the effectiveness of our framework in denoising the network and improving community detection results.", "bibtex": "@inproceedings{NIPS2016_e4873aa9,\n author = {Wang, Bo and Zhu, Junjie and Pourshafeie, Armin and Ursu, Oana and Batzoglou, Serafim and Kundaje, Anshul},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Learning from Noisy Networks with Applications to Hi-C Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/e4873aa9a05cc5ed839561d121516766-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/e4873aa9a05cc5ed839561d121516766-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/e4873aa9a05cc5ed839561d121516766-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/e4873aa9a05cc5ed839561d121516766-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/e4873aa9a05cc5ed839561d121516766-Reviews.html", "metareview": "", "pdf_size": 3517199, "gs_citation": 7, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16520566785163540629&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 4, "aff": ";;;;;", "aff_domain": ";;;;;", "email": ";;;;;", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/e4873aa9a05cc5ed839561d121516766-Abstract.html" }, { "title": "Unsupervised Learning of 3D Structure from Images", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7229", "id": "7229", "author_site": "Danilo Jimenez Rezende, S. M. Ali Eslami, Shakir Mohamed, Peter Battaglia, Max Jaderberg, Nicolas Heess", "author": "Danilo Jimenez Rezende; S. M. Ali Eslami; Shakir Mohamed; Peter Battaglia; Max Jaderberg; Nicolas Heess", "abstract": "A key goal of computer vision is to recover the underlying 3D structure that gives rise to 2D observations of the world. If endowed with 3D understanding, agents can abstract away from the complexity of the rendering process to form stable, disentangled representations of scene elements. In this paper we learn strong deep generative models of 3D structures, and recover these structures from 2D images via probabilistic inference. We demonstrate high-quality samples and report log-likelihoods on several datasets, including ShapeNet, and establish the first benchmarks in the literature. We also show how these models and their inference networks can be trained jointly, end-to-end, and directly from 2D images without any use of ground-truth 3D labels. This demonstrates for the first time the feasibility of learning to infer 3D representations of the world in a purely unsupervised manner.", "bibtex": "@inproceedings{NIPS2016_1d94108e,\n author = {Jimenez Rezende, Danilo and Eslami, S. M. Ali and Mohamed, Shakir and Battaglia, Peter and Jaderberg, Max and Heess, Nicolas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Learning of 3D Structure from Images},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1d94108e907bb8311d8802b48fd54b4a-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1d94108e907bb8311d8802b48fd54b4a-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/1d94108e907bb8311d8802b48fd54b4a-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1d94108e907bb8311d8802b48fd54b4a-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1d94108e907bb8311d8802b48fd54b4a-Reviews.html", "metareview": "", "pdf_size": 2766736, "gs_citation": 466, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10329095673099262978&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 9, "aff": "Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind; Google DeepMind", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com", "email": "google.com;google.com;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1d94108e907bb8311d8802b48fd54b4a-Abstract.html", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "title": "Unsupervised Learning of Spoken Language with Visual Context", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7325", "id": "7325", "author_site": "David Harwath, Antonio Torralba, James Glass", "author": "David Harwath; Antonio Torralba; James Glass", "abstract": "Humans learn to speak before they can read or write, so why can\u2019t computers do the same? In this paper, we present a deep neural network model capable of rudimentary spoken language acquisition using untranscribed audio training data, whose only supervision comes in the form of contextually relevant visual images. We describe the collection of our data comprised of over 120,000 spoken audio captions for the Places image dataset and evaluate our model on an image search and annotation task. We also provide some visualizations which suggest that our model is learning to recognize meaningful words within the caption spectrograms.", "bibtex": "@inproceedings{NIPS2016_82b8a343,\n author = {Harwath, David and Torralba, Antonio and Glass, James},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Learning of Spoken Language with Visual Context},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/82b8a3434904411a9fdc43ca87cee70c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/82b8a3434904411a9fdc43ca87cee70c-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/82b8a3434904411a9fdc43ca87cee70c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/82b8a3434904411a9fdc43ca87cee70c-Reviews.html", "metareview": "", "pdf_size": 4898870, "gs_citation": 303, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15916584063200601299&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": ";;", "aff_domain": ";;", "email": ";;", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/82b8a3434904411a9fdc43ca87cee70c-Abstract.html" }, { "title": "Unsupervised Risk Estimation Using Only Conditional Independence Structure", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7039", "id": "7039", "author_site": "Jacob Steinhardt, Percy Liang", "author": "Jacob Steinhardt; Percy Liang", "abstract": "We show how to estimate a model\u2019s test error from unlabeled data, on distributions very different from the training distribution, while assuming only that certain conditional independencies are preserved between train and test. We do not need to assume that the optimal predictor is the same between train and test, or that the true distribution lies in any parametric family. We can also efficiently compute gradients of the estimated error and hence perform unsupervised discriminative learning. Our technical tool is the method of moments, which allows us to exploit conditional independencies in the absence of a fully-specified model. Our framework encompasses a large family of losses including the log and exponential loss, and extends to structured output settings such as conditional random fields.", "bibtex": "@inproceedings{NIPS2016_f2d887e0,\n author = {Steinhardt, Jacob and Liang, Percy S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Unsupervised Risk Estimation Using Only Conditional Independence Structure},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/f2d887e01a80e813d9080038decbbabb-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/f2d887e01a80e813d9080038decbbabb-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/f2d887e01a80e813d9080038decbbabb-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/f2d887e01a80e813d9080038decbbabb-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/f2d887e01a80e813d9080038decbbabb-Reviews.html", "metareview": "", "pdf_size": 713990, "gs_citation": 39, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=18122552120321259398&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "aff": "Stanford University; Stanford University", "aff_domain": "cs.stanford.edu;cs.stanford.edu", "email": "cs.stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/f2d887e01a80e813d9080038decbbabb-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "Using Fast Weights to Attend to the Recent Past", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7224", "id": "7224", "author_site": "Jimmy Ba, Geoffrey E Hinton, Volodymyr Mnih, Joel Leibo, Catalin Ionescu", "author": "Jimmy Ba; Geoffrey E. Hinton; Volodymyr Mnih; Joel Z. Leibo; Catalin Ionescu", "abstract": "Until recently, research on artificial neural networks was largely restricted to systems with only two types of variable: Neural activities that represent the current or recent input and weights that learn to capture regularities among inputs, outputs and payoffs. There is no good reason for this restriction. Synapses have dynamics at many different time-scales and this suggests that artificial neural networks might benefit from variables that change slower than activities but much faster than the standard weights. These ``fast weights'' can be used to store temporary memories of the recent past and they provide a neurally plausible way of implementing the type of attention to the past that has recently proven helpful in sequence-to-sequence models. By using fast weights we can avoid the need to store copies of neural activity patterns.", "bibtex": "@inproceedings{NIPS2016_9f44e956,\n author = {Ba, Jimmy and Hinton, Geoffrey E and Mnih, Volodymyr and Leibo, Joel Z and Ionescu, Catalin},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Using Fast Weights to Attend to the Recent Past},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9f44e956e3a2b7b5598c625fcc802c36-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9f44e956e3a2b7b5598c625fcc802c36-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9f44e956e3a2b7b5598c625fcc802c36-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9f44e956e3a2b7b5598c625fcc802c36-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9f44e956e3a2b7b5598c625fcc802c36-Reviews.html", "metareview": "", "pdf_size": 357882, "gs_citation": 317, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=15137024002549952693&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 13, "aff": "University of Toronto; University of Toronto+Google Brain; Google DeepMind; Google DeepMind; Google DeepMind", "aff_domain": "psi.toronto.edu;google.com;google.com;google.com;google.com", "email": "psi.toronto.edu;google.com;google.com;google.com;google.com", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9f44e956e3a2b7b5598c625fcc802c36-Abstract.html", "aff_unique_index": "0;0+1;1;1;1", "aff_unique_norm": "University of Toronto;Google", "aff_unique_dep": ";Google Brain", "aff_unique_url": "https://www.utoronto.ca;https://brain.google.com", "aff_unique_abbr": "U of T;Google Brain", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0+1;2;2;2", "aff_country_unique": "Canada;United States;United Kingdom" }, { "title": "Using Social Dynamics to Make Individual Predictions: Variational Inference with a Stochastic Kinetic Model", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7165", "id": "7165", "author_site": "Zhen Xu, Wen Dong, Sargur N Srihari", "author": "Zhen Xu; Wen Dong; Sargur N Srihari", "abstract": "Social dynamics is concerned primarily with interactions among individuals and the resulting group behaviors, modeling the temporal evolution of social systems via the interactions of individuals within these systems. In particular, the availability of large-scale data from social networks and sensor networks offers an unprecedented opportunity to predict state-changing events at the individual level. Examples of such events include disease transmission, opinion transition in elections, and rumor propagation. Unlike previous research focusing on the collective effects of social systems, this study makes efficient inferences at the individual level. In order to cope with dynamic interactions among a large number of individuals, we introduce the stochastic kinetic model to capture adaptive transition probabilities and propose an efficient variational inference algorithm the complexity of which grows linearly \u2014 rather than exponentially\u2014 with the number of individuals. To validate this method, we have performed epidemic-dynamics experiments on wireless sensor network data collected from more than ten thousand people over three years. The proposed algorithm was used to track disease transmission and predict the probability of infection for each individual. Our results demonstrate that this method is more efficient than sampling while nonetheless achieving high accuracy.", "bibtex": "@inproceedings{NIPS2016_512c5cad,\n author = {Xu, Zhen and Dong, Wen and Srihari, Sargur N},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Using Social Dynamics to Make Individual Predictions: Variational Inference with a Stochastic Kinetic Model},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/512c5cad6c37edb98ae91c8a76c3a291-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/512c5cad6c37edb98ae91c8a76c3a291-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/512c5cad6c37edb98ae91c8a76c3a291-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/512c5cad6c37edb98ae91c8a76c3a291-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/512c5cad6c37edb98ae91c8a76c3a291-Reviews.html", "metareview": "", "pdf_size": 1575960, "gs_citation": 17, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=10687897168156518479&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 10, "aff": "Department of Computer Science and Engineering, University at Buffalo; Department of Computer Science and Engineering, University at Buffalo; Department of Computer Science and Engineering, University at Buffalo", "aff_domain": "buffalo.edu;buffalo.edu;buffalo.edu", "email": "buffalo.edu;buffalo.edu;buffalo.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/512c5cad6c37edb98ae91c8a76c3a291-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University at Buffalo", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.buffalo.edu", "aff_unique_abbr": "UB", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Buffalo", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "VIME: Variational Information Maximizing Exploration", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7150", "id": "7150", "author_site": "Rein Houthooft, Xi Chen, Peter Chen, Yan Duan, John Schulman, Filip De Turck, Pieter Abbeel", "author": "Rein Houthooft; Xi Chen; Xi Chen; Yan Duan; John Schulman; Filip De Turck; Pieter Abbeel", "abstract": "Scalable and effective exploration remains a key challenge in reinforcement learning (RL). While there are methods with optimality guarantees in the setting of discrete state and action spaces, these methods cannot be applied in high-dimensional deep RL scenarios. As such, most contemporary RL relies on simple heuristics such as epsilon-greedy exploration or adding Gaussian noise to the controls. This paper introduces Variational Information Maximizing Exploration (VIME), an exploration strategy based on maximization of information gain about the agent's belief of environment dynamics. We propose a practical implementation, using variational inference in Bayesian neural networks which efficiently handles continuous state and action spaces. VIME modifies the MDP reward function, and can be applied with several different underlying RL algorithms. We demonstrate that VIME achieves significantly better performance compared to heuristic exploration methods across a variety of continuous control tasks and algorithms, including tasks with very sparse rewards.", "bibtex": "@inproceedings{NIPS2016_abd81528,\n author = {Houthooft, Rein and Chen, Xi and Chen, Xi and Duan, Yan and Schulman, John and De Turck, Filip and Abbeel, Pieter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {VIME: Variational Information Maximizing Exploration},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/abd815286ba1007abfbb8415b83ae2cf-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/abd815286ba1007abfbb8415b83ae2cf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/abd815286ba1007abfbb8415b83ae2cf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/abd815286ba1007abfbb8415b83ae2cf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/abd815286ba1007abfbb8415b83ae2cf-Reviews.html", "metareview": "", "pdf_size": 3647454, "gs_citation": 967, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=4965361873864842159&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 12, "aff": ";;;;;;", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/abd815286ba1007abfbb8415b83ae2cf-Abstract.html" }, { "title": "Value Iteration Networks", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7211", "id": "7211", "author_site": "Aviv Tamar, Sergey Levine, Pieter Abbeel, YI WU, Garrett Thomas", "author": "Aviv Tamar; YI WU; Garrett Thomas; Sergey Levine; Pieter Abbeel", "abstract": "We introduce the value iteration network (VIN): a fully differentiable neural network with a `planning module' embedded within. VINs can learn to plan, and are suitable for predicting outcomes that involve planning-based reasoning, such as policies for reinforcement learning. Key to our approach is a novel differentiable approximation of the value-iteration algorithm, which can be represented as a convolutional neural network, and trained end-to-end using standard backpropagation. We evaluate VIN based policies on discrete and continuous path-planning domains, and on a natural-language based search task. We show that by learning an explicit planning computation, VIN policies generalize better to new, unseen domains.", "bibtex": "@inproceedings{NIPS2016_c21002f4,\n author = {Tamar, Aviv and WU, YI and Thomas, Garrett and Levine, Sergey and Abbeel, Pieter},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Value Iteration Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c21002f464c5fc5bee3b98ced83963b8-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c21002f464c5fc5bee3b98ced83963b8-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c21002f464c5fc5bee3b98ced83963b8-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c21002f464c5fc5bee3b98ced83963b8-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c21002f464c5fc5bee3b98ced83963b8-Reviews.html", "metareview": "", "pdf_size": 537787, "gs_citation": 808, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=9903414776362829253&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 19, "aff": "Dept. of Electrical Engineering and Computer Sciences, UC Berkeley; Dept. of Electrical Engineering and Computer Sciences, UC Berkeley; Dept. of Electrical Engineering and Computer Sciences, UC Berkeley; Dept. of Electrical Engineering and Computer Sciences, UC Berkeley; Dept. of Electrical Engineering and Computer Sciences, UC Berkeley", "aff_domain": ";;;;", "email": ";;;;", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c21002f464c5fc5bee3b98ced83963b8-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "Department of Electrical Engineering and Computer Sciences", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Variance Reduction in Stochastic Gradient Langevin Dynamics", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7236", "id": "7236", "author_site": "Kumar Avinava Dubey, Sashank J. Reddi, Sinead Williamson, Barnabas Poczos, Alexander Smola, Eric Xing", "author": "Kumar Avinava Dubey; Sashank J. Reddi; Sinead A Williamson; Barnabas Poczos; Alexander J Smola; Eric P Xing", "abstract": "Stochastic gradient-based Monte Carlo methods such as stochastic gradient Langevin dynamics are useful tools for posterior inference on large scale datasets in many machine learning applications. These methods scale to large datasets by using noisy gradients calculated using a mini-batch or subset of the dataset. However, the high variance inherent in these noisy gradients degrades performance and leads to slower mixing. In this paper, we present techniques for reducing variance in stochastic gradient Langevin dynamics, yielding novel stochastic Monte Carlo methods that improve performance by reducing the variance in the stochastic gradient. We show that our proposed method has better theoretical guarantees on convergence rate than stochastic Langevin dynamics. This is complemented by impressive empirical results obtained on a variety of real world datasets, and on four different machine learning tasks (regression, classification, independent component analysis and mixture modeling). These theoretical and empirical contributions combine to make a compelling case for using variance reduction in stochastic Monte Carlo methods.", "bibtex": "@inproceedings{NIPS2016_9b698eb3,\n author = {Dubey, Kumar Avinava and J. Reddi, Sashank and Williamson, Sinead A and Poczos, Barnabas and Smola, Alexander J and Xing, Eric P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Variance Reduction in Stochastic Gradient Langevin Dynamics},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9b698eb3105bd82528f23d0c92dedfc0-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9b698eb3105bd82528f23d0c92dedfc0-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9b698eb3105bd82528f23d0c92dedfc0-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9b698eb3105bd82528f23d0c92dedfc0-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9b698eb3105bd82528f23d0c92dedfc0-Reviews.html", "metareview": "", "pdf_size": 509551, "gs_citation": 120, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6252182257727316651&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 9, "aff": "Department of Machine Learning, Carnegie-Mellon University; Department of Machine Learning, Carnegie-Mellon University; Department of Machine Learning, Carnegie-Mellon University; Department of Machine Learning, Carnegie-Mellon University; Department of Machine Learning, Carnegie-Mellon University; IROM/Statistics and Data Science, University of Texas at Austin", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;mccombs.utexas.edu", "email": "cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;cs.cmu.edu;mccombs.utexas.edu", "github": "", "project": "", "author_num": 6, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9b698eb3105bd82528f23d0c92dedfc0-Abstract.html", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Carnegie Mellon University;University of Texas at Austin", "aff_unique_dep": "Department of Machine Learning;IROM/Statistics and Data Science", "aff_unique_url": "https://www.cmu.edu;https://www.utexas.edu", "aff_unique_abbr": "CMU;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Variational Autoencoder for Deep Learning of Images, Labels and Captions", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7306", "id": "7306", "author_site": "Yunchen Pu, Zhe Gan, Ricardo Henao, Xin Yuan, Chunyuan Li, Andrew Stevens, Lawrence Carin", "author": "Yunchen Pu; Zhe Gan; Ricardo Henao; Xin Yuan; Chunyuan Li; Andrew Stevens; Lawrence Carin", "abstract": "A novel variational autoencoder is developed to model images, as well as associated labels or captions. The Deep Generative Deconvolutional Network (DGDN) is used as a decoder of the latent image features, and a deep Convolutional Neural Network (CNN) is used as an image encoder; the CNN is used to approximate a distribution for the latent DGDN features/code. The latent code is also linked to generative models for labels (Bayesian support vector machine) or captions (recurrent neural network). When predicting a label/caption for a new image at test, averaging is performed across the distribution of latent codes; this is computationally efficient as a consequence of the learned CNN-based encoder. Since the framework is capable of modeling the image in the presence/absence of associated labels/captions, a new semi-supervised setting is manifested for CNN learning with images; the framework even allows unsupervised CNN learning, based on images alone.", "bibtex": "@inproceedings{NIPS2016_eb86d510,\n author = {Pu, Yunchen and Gan, Zhe and Henao, Ricardo and Yuan, Xin and Li, Chunyuan and Stevens, Andrew and Carin, Lawrence},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Variational Autoencoder for Deep Learning of Images, Labels and Captions},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/eb86d510361fc23b59f18c1bc9802cc6-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/eb86d510361fc23b59f18c1bc9802cc6-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/eb86d510361fc23b59f18c1bc9802cc6-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/eb86d510361fc23b59f18c1bc9802cc6-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/eb86d510361fc23b59f18c1bc9802cc6-Reviews.html", "metareview": "", "pdf_size": 487883, "gs_citation": 1096, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6882187919491425397&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 12, "aff": "Department of Electrical and Computer Engineering, Duke University\u2020; Department of Electrical and Computer Engineering, Duke University\u2020; Department of Electrical and Computer Engineering, Duke University\u2020; Nokia Bell Labs, Murray Hill\u2021; Department of Electrical and Computer Engineering, Duke University\u2020; Department of Electrical and Computer Engineering, Duke University\u2020; Department of Electrical and Computer Engineering, Duke University\u2020", "aff_domain": "duke.edu;duke.edu;duke.edu;bell-labs.com;duke.edu;duke.edu;duke.edu", "email": "duke.edu;duke.edu;duke.edu;bell-labs.com;duke.edu;duke.edu;duke.edu", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/eb86d510361fc23b59f18c1bc9802cc6-Abstract.html", "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Duke University;Nokia Bell Labs", "aff_unique_dep": "Department of Electrical and Computer Engineering;", "aff_unique_url": "https://www.duke.edu;https://www.nokia.com bell-labs/", "aff_unique_abbr": "Duke;Nokia Bell Labs", "aff_campus_unique_index": "1", "aff_campus_unique": ";Murray Hill", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "title": "Variational Bayes on Monte Carlo Steroids", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7101", "id": "7101", "author_site": "Aditya Grover, Stefano Ermon", "author": "Aditya Grover; Stefano Ermon", "abstract": "Variational approaches are often used to approximate intractable posteriors or normalization constants in hierarchical latent variable models. While often effective in practice, it is known that the approximation error can be arbitrarily large. We propose a new class of bounds on the marginal log-likelihood of directed latent variable models. Our approach relies on random projections to simplify the posterior. In contrast to standard variational methods, our bounds are guaranteed to be tight with high probability. We provide a new approach for learning latent variable models based on optimizing our new bounds on the log-likelihood. We demonstrate empirical improvements on benchmark datasets in vision and language for sigmoid belief networks, where a neural network is used to approximate the posterior.", "bibtex": "@inproceedings{NIPS2016_0c9ebb2d,\n author = {Grover, Aditya and Ermon, Stefano},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Variational Bayes on Monte Carlo Steroids},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/0c9ebb2ded806d7ffda75cd0b95eb70c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/0c9ebb2ded806d7ffda75cd0b95eb70c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/0c9ebb2ded806d7ffda75cd0b95eb70c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/0c9ebb2ded806d7ffda75cd0b95eb70c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/0c9ebb2ded806d7ffda75cd0b95eb70c-Reviews.html", "metareview": "", "pdf_size": 1462771, "gs_citation": 4, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2749665019143216494&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 4, "aff": "Department of Computer Science; Department of Computer Science", "aff_domain": "cs.stanford.edu;cs.stanford.edu", "email": "cs.stanford.edu;cs.stanford.edu", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/0c9ebb2ded806d7ffda75cd0b95eb70c-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Unknown Institution", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "", "aff_country_unique": "" }, { "title": "Variational Inference in Mixed Probabilistic Submodular Models", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7298", "id": "7298", "author_site": "Josip Djolonga, Sebastian Tschiatschek, Andreas Krause", "author": "Josip Djolonga; Sebastian Tschiatschek; Andreas Krause", "abstract": "We consider the problem of variational inference in probabilistic models with both log-submodular and log-supermodular higher-order potentials. These models can represent arbitrary distributions over binary variables, and thus generalize the commonly used pairwise Markov random fields and models with log-supermodular potentials only, for which efficient approximate inference algorithms are known. While inference in the considered models is #P-hard in general, we present efficient approximate algorithms exploiting recent advances in the field of discrete optimization. We demonstrate the effectiveness of our approach in a large set of experiments, where our model allows reasoning about preferences over sets of items with complements and substitutes.", "bibtex": "@inproceedings{NIPS2016_9232fe81,\n author = {Djolonga, Josip and Tschiatschek, Sebastian and Krause, Andreas},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Variational Inference in Mixed Probabilistic Submodular Models},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9232fe81225bcaef853ae32870a2b0fe-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9232fe81225bcaef853ae32870a2b0fe-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9232fe81225bcaef853ae32870a2b0fe-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9232fe81225bcaef853ae32870a2b0fe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9232fe81225bcaef853ae32870a2b0fe-Reviews.html", "metareview": "", "pdf_size": 745506, "gs_citation": 23, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=3347482883331214741&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 11, "aff": "Department of Computer Science, ETH Z\u00fcrich; Department of Computer Science, ETH Z\u00fcrich; Department of Computer Science, ETH Z\u00fcrich", "aff_domain": "inf.ethz.ch;inf.ethz.ch;inf.ethz.ch", "email": "inf.ethz.ch;inf.ethz.ch;inf.ethz.ch", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9232fe81225bcaef853ae32870a2b0fe-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "title": "Variational Information Maximization for Feature Selection", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7172", "id": "7172", "author_site": "Shuyang Gao, Greg Ver Steeg, Aram Galstyan", "author": "Shuyang Gao; Greg Ver Steeg; Aram Galstyan", "abstract": "Feature selection is one of the most fundamental problems in machine learning. An extensive body of work on information-theoretic feature selection exists which is based on maximizing mutual information between subsets of features and class labels. Practical methods are forced to rely on approximations due to the difficulty of estimating mutual information. We demonstrate that approximations made by existing methods are based on unrealistic assumptions. We formulate a more flexible and general class of assumptions based on variational distributions and use them to tractably generate lower bounds for mutual information. These bounds define a novel information-theoretic framework for feature selection, which we prove to be optimal under tree graphical models with proper choice of variational distributions. Our experiments demonstrate that the proposed method strongly outperforms existing information-theoretic feature selection approaches.", "bibtex": "@inproceedings{NIPS2016_7f100b7b,\n author = {Gao, Shuyang and Ver Steeg, Greg and Galstyan, Aram},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Variational Information Maximization for Feature Selection},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/7f100b7b36092fb9b06dfb4fac360931-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/7f100b7b36092fb9b06dfb4fac360931-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/7f100b7b36092fb9b06dfb4fac360931-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/7f100b7b36092fb9b06dfb4fac360931-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/7f100b7b36092fb9b06dfb4fac360931-Reviews.html", "metareview": "", "pdf_size": 730989, "gs_citation": 69, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=17183701699119100182&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "University of Southern California, Information Sciences Institute; University of Southern California, Information Sciences Institute; University of Southern California, Information Sciences Institute", "aff_domain": "usc.edu;isi.edu;isi.edu", "email": "usc.edu;isi.edu;isi.edu", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/7f100b7b36092fb9b06dfb4fac360931-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "Information Sciences Institute", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "Verification Based Solution for Structured MAB Problems", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7020", "id": "7020", "author_site": "Zohar Karnin", "author": "Zohar S Karnin", "abstract": "We consider the problem of finding the best arm in a stochastic Mutli-armed Bandit (MAB) game and propose a general framework based on verification that applies to multiple well-motivated generalizations of the classic MAB problem. In these generalizations, additional structure is known in advance, causing the task of verifying the optimality of a candidate to be easier than discovering the best arm. Our results are focused on the scenario where the failure probability $\\delta$ must be very low; we essentially show that in this high confidence regime, identifying the best arm is as easy as the task of verification. We demonstrate the effectiveness of our framework by applying it, and improving the state-of-the art results in the problems of: Linear bandits, Dueling bandits with the Condorcet assumption, Copeland dueling bandits, Unimodal bandits and Graphical bandits.", "bibtex": "@inproceedings{NIPS2016_65b9eea6,\n author = {Karnin, Zohar S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Verification Based Solution for Structured MAB Problems},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/65b9eea6e1cc6bb9f0cd2a47751a186f-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/65b9eea6e1cc6bb9f0cd2a47751a186f-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/65b9eea6e1cc6bb9f0cd2a47751a186f-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/65b9eea6e1cc6bb9f0cd2a47751a186f-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/65b9eea6e1cc6bb9f0cd2a47751a186f-Reviews.html", "metareview": "", "pdf_size": 491635, "gs_citation": 41, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13597749520408842940&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 3, "aff": "Yahoo Research", "aff_domain": "ymail.com", "email": "ymail.com", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/65b9eea6e1cc6bb9f0cd2a47751a186f-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Yahoo", "aff_unique_dep": "Yahoo Research", "aff_unique_url": "https://research.yahoo.com", "aff_unique_abbr": "Yahoo Research", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "title": "Visual Dynamics: Probabilistic Future Frame Synthesis via Cross Convolutional Networks", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7406", "id": "7406", "author_site": "Tianfan Xue, Jiajun Wu, Katherine Bouman, Bill Freeman", "author": "Tianfan Xue; Jiajun Wu; Katherine Bouman; Bill Freeman", "abstract": "We study the problem of synthesizing a number of likely future frames from a single input image. In contrast to traditional methods, which have tackled this problem in a deterministic or non-parametric way, we propose a novel approach which models future frames in a probabilistic manner. Our proposed method is therefore able to synthesize multiple possible next frames using the same model. Solving this challenging problem involves low- and high-level image and motion understanding for successful image synthesis. Here, we propose a novel network structure, namely a Cross Convolutional Network, that encodes images as feature maps and motion information as convolutional kernels to aid in synthesizing future frames. In experiments, our model performs well on both synthetic data, such as 2D shapes and animated game sprites, as well as on real-wold video data. We show that our model can also be applied to tasks such as visual analogy-making, and present analysis of the learned network representations.", "bibtex": "@inproceedings{NIPS2016_03afdbd6,\n author = {Xue, Tianfan and Wu, Jiajun and Bouman, Katherine and Freeman, Bill},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Visual Dynamics: Probabilistic Future Frame Synthesis via Cross Convolutional Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/03afdbd66e7929b125f8597834fa83a4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/03afdbd66e7929b125f8597834fa83a4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/03afdbd66e7929b125f8597834fa83a4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/03afdbd66e7929b125f8597834fa83a4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/03afdbd66e7929b125f8597834fa83a4-Reviews.html", "metareview": "", "pdf_size": 1285739, "gs_citation": 521, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=6484494541166161719&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 17, "aff": "Massachusetts Institute of Technology; Massachusetts Institute of Technology; Massachusetts Institute of Technology; Massachusetts Institute of Technology+Google Research", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu", "email": "mit.edu;mit.edu;mit.edu;mit.edu", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/03afdbd66e7929b125f8597834fa83a4-Abstract.html", "aff_unique_index": "0;0;0;0+1", "aff_unique_norm": "Massachusetts Institute of Technology;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://web.mit.edu;https://research.google", "aff_unique_abbr": "MIT;Google Research", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0+0", "aff_country_unique": "United States" }, { "title": "Visual Question Answering with Question Representation Update (QRU)", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7115", "id": "7115", "author_site": "Ruiyu Li, Jiaya Jia", "author": "Ruiyu Li; Jiaya Jia", "abstract": "Our method aims at reasoning over natural language questions and visual images. Given a natural language question about an image, our model updates the question representation iteratively by selecting image regions relevant to the query and learns to give the correct answer. Our model contains several reasoning layers, exploiting complex visual relations in the visual question answering (VQA) task. The proposed network is end-to-end trainable through back-propagation, where its weights are initialized using pre-trained convolutional neural network (CNN) and gated recurrent unit (GRU). Our method is evaluated on challenging datasets of COCO-QA and VQA and yields state-of-the-art performance.", "bibtex": "@inproceedings{NIPS2016_fd69dbe2,\n author = {Li, Ruiyu and Jia, Jiaya},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Visual Question Answering with Question Representation Update (QRU)},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/fd69dbe29f156a7ef876a40a94f65599-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/fd69dbe29f156a7ef876a40a94f65599-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/fd69dbe29f156a7ef876a40a94f65599-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/fd69dbe29f156a7ef876a40a94f65599-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/fd69dbe29f156a7ef876a40a94f65599-Reviews.html", "metareview": "", "pdf_size": 818279, "gs_citation": 105, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=16153635375574201110&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "The Chinese University of Hong Kong; The Chinese University of Hong Kong", "aff_domain": "cse.cuhk.edu.hk;cse.cuhk.edu.hk", "email": "cse.cuhk.edu.hk;cse.cuhk.edu.hk", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/fd69dbe29f156a7ef876a40a94f65599-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "title": "Wasserstein Training of Restricted Boltzmann Machines", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7365", "id": "7365", "author_site": "Gr\u00e9goire Montavon, Klaus-Robert M\u00fcller, Marco Cuturi", "author": "Gr\u00e9goire Montavon; Klaus-Robert M\u00fcller; Marco Cuturi", "abstract": "Boltzmann machines are able to learn highly complex, multimodal, structured and multiscale real-world data distributions. Parameters of the model are usually learned by minimizing the Kullback-Leibler (KL) divergence from training samples to the learned model. We propose in this work a novel approach for Boltzmann machine training which assumes that a meaningful metric between observations is known. This metric between observations can then be used to define the Wasserstein distance between the distribution induced by the Boltzmann machine on the one hand, and that given by the training sample on the other hand. We derive a gradient of that distance with respect to the model parameters. Minimization of this new objective leads to generative models with different statistical properties. We demonstrate their practical potential on data completion and denoising, for which the metric between observations plays a crucial role.", "bibtex": "@inproceedings{NIPS2016_728f206c,\n author = {Montavon, Gr\\'{e}goire and M\\\"{u}ller, Klaus-Robert and Cuturi, Marco},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Wasserstein Training of Restricted Boltzmann Machines},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/728f206c2a01bf572b5940d7d9a8fa4c-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/728f206c2a01bf572b5940d7d9a8fa4c-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/728f206c2a01bf572b5940d7d9a8fa4c-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/728f206c2a01bf572b5940d7d9a8fa4c-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/728f206c2a01bf572b5940d7d9a8fa4c-Reviews.html", "metareview": "", "pdf_size": 1588325, "gs_citation": 150, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2726009032429206815&as_sdt=400005&sciodt=0,14&hl=en", "gs_version_total": 9, "aff": "Technische Universit\u00e4t Berlin; Technische Universit\u00e4t Berlin + Department of Brain and Cognitive Engineering, Korea University; CREST, ENSAE, Universit\u00e9 Paris-Saclay", "aff_domain": "tu-berlin.de;tu-berlin.de;ensae.fr", "email": "tu-berlin.de;tu-berlin.de;ensae.fr", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/728f206c2a01bf572b5940d7d9a8fa4c-Abstract.html", "aff_unique_index": "0;0+1;2", "aff_unique_norm": "Technische Universit\u00e4t Berlin;Korea University;CREST", "aff_unique_dep": ";Department of Brain and Cognitive Engineering;", "aff_unique_url": "https://www.tu-berlin.de;http://www.korea.ac.kr;https://www.crest.fr", "aff_unique_abbr": "TU Berlin;KU;CREST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0+1;2", "aff_country_unique": "Germany;South Korea;France" }, { "title": "Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7412", "id": "7412", "author_site": "Tim Salimans, Diederik Kingma", "author": "Tim Salimans; Diederik P. Kingma", "abstract": "We present weight normalization: a reparameterization of the weight vectors in a neural network that decouples the length of those weight vectors from their direction. By reparameterizing the weights in this way we improve the conditioning of the optimization problem and we speed up convergence of stochastic gradient descent. Our reparameterization is inspired by batch normalization but does not introduce any dependencies between the examples in a minibatch. This means that our method can also be applied successfully to recurrent models such as LSTMs and to noise-sensitive applications such as deep reinforcement learning or generative models, for which batch normalization is less well suited. Although our method is much simpler, it still provides much of the speed-up of full batch normalization. In addition, the computational overhead of our method is lower, permitting more optimization steps to be taken in the same amount of time. We demonstrate the usefulness of our method on applications in supervised image recognition, generative modelling, and deep reinforcement learning.", "bibtex": "@inproceedings{NIPS2016_ed265bc9,\n author = {Salimans, Tim and Kingma, Durk P},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/ed265bc903a5a097f61d3ec064d96d2e-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/ed265bc903a5a097f61d3ec064d96d2e-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/ed265bc903a5a097f61d3ec064d96d2e-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/ed265bc903a5a097f61d3ec064d96d2e-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/ed265bc903a5a097f61d3ec064d96d2e-Reviews.html", "metareview": "", "pdf_size": 293353, "gs_citation": 2495, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=5176697277672103356&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 8, "aff": "OpenAI; OpenAI", "aff_domain": "openai.com;openai.com", "email": "openai.com;openai.com", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/ed265bc903a5a097f61d3ec064d96d2e-Abstract.html", "aff_unique_index": "0;0", "aff_unique_norm": "OpenAI", "aff_unique_dep": "", "aff_unique_url": "https://openai.com", "aff_unique_abbr": "OpenAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "title": "What Makes Objects Similar: A Unified Multi-Metric Learning Approach", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7324", "id": "7324", "author_site": "Han-Jia Ye, De-Chuan Zhan, Xue-Min Si, Yuan Jiang, Zhi-Hua Zhou", "author": "Han-Jia Ye; De-Chuan Zhan; Xue-Min Si; Yuan Jiang; Zhi-Hua Zhou", "abstract": "Linkages are essentially determined by similarity measures that may be derived from multiple perspectives. For example, spatial linkages are usually generated based on localities of heterogeneous data, whereas semantic linkages can come from various properties, such as different physical meanings behind social relations. Many existing metric learning models focus on spatial linkages, but leave the rich semantic factors unconsidered. Similarities based on these models are usually overdetermined on linkages. We propose a Unified Multi-Metric Learning (UM2L) framework to exploit multiple types of metrics. In UM2L, a type of combination operator is introduced for distance characterization from multiple perspectives, and thus can introduce flexibilities for representing and utilizing both spatial and semantic linkages. Besides, we propose a uniform solver for UM2L which is guaranteed to converge. Extensive experiments on diverse applications exhibit the superior classification performance and comprehensibility of UM2L. Visualization results also validate its ability on physical meanings discovery.", "bibtex": "@inproceedings{NIPS2016_8fecb208,\n author = {Ye, Han-Jia and Zhan, De-Chuan and Si, Xue-Min and Jiang, Yuan and Zhou, Zhi-Hua},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {What Makes Objects Similar: A Unified Multi-Metric Learning Approach},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/8fecb20817b3847419bb3de39a609afe-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/8fecb20817b3847419bb3de39a609afe-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/8fecb20817b3847419bb3de39a609afe-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/8fecb20817b3847419bb3de39a609afe-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/8fecb20817b3847419bb3de39a609afe-Reviews.html", "metareview": "", "pdf_size": 4058043, "gs_citation": 54, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=681336522116570966&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 13, "aff": "National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, 210023, China; National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, 210023, China; National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, 210023, China; National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, 210023, China; National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, 210023, China", "aff_domain": "lamda.nju.edu.cn;lamda.nju.edu.cn;lamda.nju.edu.cn;lamda.nju.edu.cn;lamda.nju.edu.cn", "email": "lamda.nju.edu.cn;lamda.nju.edu.cn;lamda.nju.edu.cn;lamda.nju.edu.cn;lamda.nju.edu.cn", "github": "", "project": "", "author_num": 5, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/8fecb20817b3847419bb3de39a609afe-Abstract.html", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "National Key Laboratory for Novel Software Technology", "aff_unique_url": "http://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Nanjing", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "title": "Without-Replacement Sampling for Stochastic Gradient Methods", "status": "Oral", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7405", "id": "7405", "author": "Ohad Shamir", "abstract": "Stochastic gradient methods for machine learning and optimization problems are usually analyzed assuming data points are sampled", "bibtex": "@inproceedings{NIPS2016_c74d97b0,\n author = {Shamir, Ohad},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Without-Replacement Sampling for Stochastic Gradient Methods},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/c74d97b01eae257e44aa9d5bade97baf-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/c74d97b01eae257e44aa9d5bade97baf-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/c74d97b01eae257e44aa9d5bade97baf-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/c74d97b01eae257e44aa9d5bade97baf-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/c74d97b01eae257e44aa9d5bade97baf-Reviews.html", "metareview": "", "pdf_size": 354128, "gs_citation": 183, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=297443996027068413&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 7, "aff": "Department of Computer Science and Applied Mathematics, Weizmann Institute of Science, Rehovot, Israel", "aff_domain": "weizmann.ac.il", "email": "weizmann.ac.il", "github": "", "project": "", "author_num": 1, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/c74d97b01eae257e44aa9d5bade97baf-Abstract.html", "aff_unique_index": "0", "aff_unique_norm": "Weizmann Institute of Science", "aff_unique_dep": "Department of Computer Science and Applied Mathematics", "aff_unique_url": "https://www.weizmann.ac.il", "aff_unique_abbr": "Weizmann", "aff_campus_unique_index": "0", "aff_campus_unique": "Rehovot", "aff_country_unique_index": "0", "aff_country_unique": "Israel" }, { "title": "Yggdrasil: An Optimized System for Training Deep Decision Trees at Scale", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7012", "id": "7012", "author_site": "Firas Abuzaid, Joseph K Bradley, Feynman Liang, Andrew Feng, Lee Yang, Matei Zaharia, Ameet S Talwalkar", "author": "Firas Abuzaid; Joseph K. Bradley; Feynman T Liang; Andrew Feng; Lee Yang; Matei Zaharia; Ameet S Talwalkar", "abstract": "Deep distributed decision trees and tree ensembles have grown in importance due to the need to model increasingly large datasets. However, PLANET, the standard distributed tree learning algorithm implemented in systems such as \\xgboost and Spark MLlib, scales poorly as data dimensionality and tree depths grow. We present Yggdrasil, a new distributed tree learning method that outperforms existing methods by up to 24x. Unlike PLANET, Yggdrasil is based on vertical partitioning of the data (i.e., partitioning by feature), along with a set of optimized data structures to reduce the CPU and communication costs of training. Yggdrasil (1) trains directly on compressed data for compressible features and labels; (2) introduces efficient data structures for training on uncompressed data; and (3) minimizes communication between nodes by using sparse bitvectors. Moreover, while PLANET approximates split points through feature binning, Yggdrasil does not require binning, and we analytically characterize the impact of this approximation. We evaluate Yggdrasil against the MNIST 8M dataset and a high-dimensional dataset at Yahoo; for both, Yggdrasil is faster by up to an order of magnitude.", "bibtex": "@inproceedings{NIPS2016_9fdb62f9,\n author = {Abuzaid, Firas and Bradley, Joseph K and Liang, Feynman T and Feng, Andrew and Yang, Lee and Zaharia, Matei and Talwalkar, Ameet S},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {Yggdrasil: An Optimized System for Training Deep Decision Trees at Scale},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/9fdb62f932adf55af2c0e09e55861964-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/9fdb62f932adf55af2c0e09e55861964-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/9fdb62f932adf55af2c0e09e55861964-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/9fdb62f932adf55af2c0e09e55861964-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/9fdb62f932adf55af2c0e09e55861964-Reviews.html", "metareview": "", "pdf_size": 453177, "gs_citation": 31, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7254933952566477961&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "MIT CSAIL; Databricks; University of Cambridge; Yahoo; Yahoo; MIT CSAIL; UCLA", "aff_domain": ";;;;;;", "email": ";;;;;;", "github": "", "project": "", "author_num": 7, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/9fdb62f932adf55af2c0e09e55861964-Abstract.html", "aff_unique_index": "0;1;2;3;3;0;4", "aff_unique_norm": "Massachusetts Institute of Technology;Databricks;University of Cambridge;Yahoo;University of California, Los Angeles", "aff_unique_dep": "Computer Science and Artificial Intelligence Laboratory;;;;", "aff_unique_url": "https://www.csail.mit.edu;https://databricks.com;https://www.cam.ac.uk;https://www.yahoo.com;https://www.ucla.edu", "aff_unique_abbr": "MIT CSAIL;Databricks;Cambridge;Yahoo;UCLA", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Cambridge;;Los Angeles", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "title": "beta-risk: a New Surrogate Risk for Learning from Weakly Labeled Data", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6909", "id": "6909", "author_site": "Valentina Zantedeschi, R\u00e9mi Emonet, Marc Sebban", "author": "Valentina Zantedeschi; R\u00e9mi Emonet; Marc Sebban", "abstract": "During the past few years, the machine learning community has paid attention to developping new methods for learning from weakly labeled data. This field covers different settings like semi-supervised learning, learning with label proportions, multi-instance learning, noise-tolerant learning, etc. This paper presents a generic framework to deal with these weakly labeled scenarios. We introduce the beta-risk as a generalized formulation of the standard empirical risk based on surrogate margin-based loss functions. This risk allows us to express the reliability on the labels and to derive different kinds of learning algorithms. We specifically focus on SVMs and propose a soft margin beta-svm algorithm which behaves better that the state of the art.", "bibtex": "@inproceedings{NIPS2016_1e8c391a,\n author = {Zantedeschi, Valentina and Emonet, R\\'{e}mi and Sebban, Marc},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {beta-risk: a New Surrogate Risk for Learning from Weakly Labeled Data},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/1e8c391abfde9abea82d75a2d60278d4-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/1e8c391abfde9abea82d75a2d60278d4-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/1e8c391abfde9abea82d75a2d60278d4-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/1e8c391abfde9abea82d75a2d60278d4-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/1e8c391abfde9abea82d75a2d60278d4-Reviews.html", "metareview": "", "pdf_size": 269106, "gs_citation": 8, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=2775653624338218343&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 11, "aff": "Univ Lyon, UJM-Saint-Etienne, CNRS, Institut d Optique Graduate School, Laboratoire Hubert Curien UMR 5516, F-42023, SAINT-ETIENNE, France; Univ Lyon, UJM-Saint-Etienne, CNRS, Institut d Optique Graduate School, Laboratoire Hubert Curien UMR 5516, F-42023, SAINT-ETIENNE, France; Univ Lyon, UJM-Saint-Etienne, CNRS, Institut d Optique Graduate School, Laboratoire Hubert Curien UMR 5516, F-42023, SAINT-ETIENNE, France", "aff_domain": "univ-st-etienne.fr;univ-st-etienne.fr;univ-st-etienne.fr", "email": "univ-st-etienne.fr;univ-st-etienne.fr;univ-st-etienne.fr", "github": "", "project": "http://vzantedeschi.com/", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/1e8c391abfde9abea82d75a2d60278d4-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Universite Lyon", "aff_unique_dep": "Institut d Optique Graduate School", "aff_unique_url": "https://www.univ-lyon.fr", "aff_unique_abbr": "Univ Lyon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Saint-Etienne", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "title": "f-GAN: Training Generative Neural Samplers using Variational Divergence Minimization", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7334", "id": "7334", "author_site": "Sebastian Nowozin, Botond Cseke, Ryota Tomioka", "author": "Sebastian Nowozin; Botond Cseke; Ryota Tomioka", "abstract": "Generative neural networks are probabilistic models that implement sampling using feedforward neural networks: they take a random input vector and produce a sample from a probability distribution defined by the network weights. These models are expressive and allow efficient computation of samples and derivatives, but cannot be used for computing likelihoods or for marginalization. The generative-adversarial training method allows to train such models through the use of an auxiliary discriminative neural network. We show that the generative-adversarial approach is a special case of an existing more general variational divergence estimation approach. We show that any $f$-divergence can be used for training generative neural networks. We discuss the benefits of various choices of divergence functions on training complexity and the quality of the obtained generative models.", "bibtex": "@inproceedings{NIPS2016_cedebb6e,\n author = {Nowozin, Sebastian and Cseke, Botond and Tomioka, Ryota},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {f-GAN: Training Generative Neural Samplers using Variational Divergence Minimization},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/cedebb6e872f539bef8c3f919874e9d7-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/cedebb6e872f539bef8c3f919874e9d7-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/cedebb6e872f539bef8c3f919874e9d7-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/cedebb6e872f539bef8c3f919874e9d7-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/cedebb6e872f539bef8c3f919874e9d7-Reviews.html", "metareview": "", "pdf_size": 3419363, "gs_citation": 2103, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=11521929775075838473&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 10, "aff": "Machine Intelligence and Perception Group, Microsoft Research; Machine Intelligence and Perception Group, Microsoft Research; Machine Intelligence and Perception Group, Microsoft Research", "aff_domain": "microsoft.com;microsoft.com;microsoft.com", "email": "microsoft.com;microsoft.com;microsoft.com", "github": "", "project": "", "author_num": 3, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/cedebb6e872f539bef8c3f919874e9d7-Abstract.html", "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Machine Intelligence and Perception Group", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "title": "k*-Nearest Neighbors: From Global to Local", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/6900", "id": "6900", "author_site": "Oren Anava, Kfir Y. Levy", "author": "Oren Anava; Kfir Levy", "abstract": "The weighted k-nearest neighbors algorithm is one of the most fundamental non-parametric methods in pattern recognition and machine learning. The question of setting the optimal number of neighbors as well as the optimal weights has received much attention throughout the years, nevertheless this problem seems to have remained unsettled. In this paper we offer a simple approach to locally weighted regression/classification, where we make the bias-variance tradeoff explicit. Our formulation enables us to phrase a notion of optimal weights, and to efficiently find these weights as well as the optimal number of neighbors efficiently and adaptively, for each data point whose value we wish to estimate. The applicability of our approach is demonstrated on several datasets, showing superior performance over standard locally weighted methods.", "bibtex": "@inproceedings{NIPS2016_2c6ae45a,\n author = {Anava, Oren and Levy, Kfir},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {k\\ast -Nearest Neighbors: From Global to Local},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/2c6ae45a3e88aee548c0714fad7f8269-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/2c6ae45a3e88aee548c0714fad7f8269-Paper.pdf", "supp": "", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/2c6ae45a3e88aee548c0714fad7f8269-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/2c6ae45a3e88aee548c0714fad7f8269-Reviews.html", "metareview": "", "pdf_size": 543669, "gs_citation": 126, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=7721584304263865737&as_sdt=5,33&sciodt=0,33&hl=en", "gs_version_total": 7, "aff": "The Voleon Group; ETH Zurich", "aff_domain": "voleon.com;inf.ethz.ch", "email": "voleon.com;inf.ethz.ch", "github": "", "project": "", "author_num": 2, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/2c6ae45a3e88aee548c0714fad7f8269-Abstract.html", "aff_unique_index": "0;1", "aff_unique_norm": "Voleon Group;ETH Zurich", "aff_unique_dep": ";", "aff_unique_url": ";https://www.ethz.ch", "aff_unique_abbr": ";ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Switzerland" }, { "title": "\u201cCongruent\u201d and \u201cOpposite\u201d Neurons: Sisters for Multisensory Integration and Segregation", "status": "Poster", "track": "main", "site": "https://nips.cc/virtual/2016/poster/7083", "id": "7083", "author_site": "Wen-Hao Zhang, He Wang, K. Y. Michael Wong, Si Wu", "author": "Wen-Hao Zhang; He Wang; K. Y. Michael Wong; Si Wu", "abstract": "Experiments reveal that in the dorsal medial superior temporal (MSTd) and the ventral intraparietal (VIP) areas, where visual and vestibular cues are integrated to infer heading direction, there are two types of neurons with roughly the same number. One is \u201ccongruent\u201d cells, whose preferred heading directions are similar in response to visual and vestibular cues; and the other is \u201copposite\u201d cells, whose preferred heading directions are nearly \u201copposite\u201d (with an offset of 180 degree) in response to visual vs. vestibular cues. Congruent neurons are known to be responsible for cue integration, but the computational role of opposite neurons remains largely unknown. Here, we propose that opposite neurons may serve to encode the disparity information between cues necessary for multisensory segregation. We build a computational model composed of two reciprocally coupled modules, MSTd and VIP, and each module consists of groups of congruent and opposite neurons. In the model, congruent neurons in two modules are reciprocally connected with each other in the congruent manner, whereas opposite neurons are reciprocally connected in the opposite manner. Mimicking the experimental protocol, our model reproduces the characteristics of congruent and opposite neurons, and demonstrates that in each module, the sisters of congruent and opposite neurons can jointly achieve optimal multisensory information integration and segregation. This study sheds light on our understanding of how the brain implements optimal multisensory integration and segregation concurrently in a distributed manner.", "bibtex": "@inproceedings{NIPS2016_88a19961,\n author = {Zhang, Wen-Hao and Wang, He and Wong, K. Y. Michael and Wu, Si},\n booktitle = {Advances in Neural Information Processing Systems},\n editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett},\n pages = {},\n publisher = {Curran Associates, Inc.},\n title = {\\textquotedblleft Congruent\\textquotedblright and \\textquotedblleft Opposite\\textquotedblright Neurons: Sisters for Multisensory Integration and Segregation},\n url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/88a199611ac2b85bd3f76e8ee7e55650-Paper.pdf},\n volume = {29},\n year = {2016}\n}", "pdf": "https://papers.nips.cc/paper_files/paper/2016/file/88a199611ac2b85bd3f76e8ee7e55650-Paper.pdf", "supp": "https://papers.nips.cc/paper_files/paper/2016/file/88a199611ac2b85bd3f76e8ee7e55650-Supplemental.zip", "metadata": "https://papers.nips.cc/paper_files/paper/2016/file/88a199611ac2b85bd3f76e8ee7e55650-Metadata.json", "review": "https://papers.nips.cc/paper_files/paper/2016/file/88a199611ac2b85bd3f76e8ee7e55650-Reviews.html", "metareview": "", "pdf_size": 595295, "gs_citation": 10, "gs_cited_by_link": "https://scholar.google.com/scholar?cites=13572916926047612315&as_sdt=2005&sciodt=0,5&hl=en", "gs_version_total": 8, "aff": "Department of Physics, Hong Kong University of Science and Technology, Hong Kong + Center for the Neural Basis of Cognition, Carnegie Mellon University; Department of Physics, Hong Kong University of Science and Technology, Hong Kong; Department of Physics, Hong Kong University of Science and Technology, Hong Kong; State Key Lab of Cognitive Neuroscience and Learning, and IDG/McGovern Institute for Brain Research, Beijing Normal University, China", "aff_domain": "ust.hk;connect.ust.hk;ust.hk;bnu.edu.cn", "email": "ust.hk;connect.ust.hk;ust.hk;bnu.edu.cn", "github": "", "project": "", "author_num": 4, "oa": "https://papers.nips.cc/paper_files/paper/2016/hash/88a199611ac2b85bd3f76e8ee7e55650-Abstract.html", "aff_unique_index": "0+1;0;0;2", "aff_unique_norm": "Hong Kong University of Science and Technology;Carnegie Mellon University;Beijing Normal University", "aff_unique_dep": "Department of Physics;Center for the Neural Basis of Cognition;State Key Lab of Cognitive Neuroscience and Learning", "aff_unique_url": "https://www.ust.hk;https://www.cmu.edu;https://www.bnu.edu.cn", "aff_unique_abbr": "HKUST;CMU;BNU", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Hong Kong SAR;;Beijing", "aff_country_unique_index": "0+1;0;0;0", "aff_country_unique": "China;United States" } ]