Publications

Below is a collection of my publications, including any preprints or technical reports. You can also check out my Google Scholar entry for a list that is more likely to be up-to-date.

Papers

Hoffman, M., Shahriari, B., Aslanides, J., Barth-Maron, G., Behbahani, F., Norman, T., Abdolmaleki, A., Cassirer, A., Yang, F., Baumli, K., Henderson, S., Novikov, A., Colmenarejo, S. G., Cabi, S., Gulcehre, C., Paine, T. L., Cowie, A., Wang, Z., Piot, B., and de Freitas, N. (2020). Acme: A Research Framework for Distributed Reinforcement Learning. arXiv:2006.00979. [pdf] [bibtex]

@techreport{hoffman:2020,
  title = {Acme: A Research Framework for Distributed Reinforcement Learning},
  author = {Hoffman, Matt and Shahriari, Bobak and Aslanides, John and Barth-Maron, Gabriel and Behbahani, Feryal and Norman, Tamara and Abdolmaleki, Abbas and Cassirer, Albin and Yang, Fan and Baumli, Kate and Henderson, Sarah and Novikov, Alex and Colmenarejo, Sergio Gómez and Cabi, Serkan and Gulcehre, Caglar and Paine, Tom Le and Cowie, Andrew and Wang, Ziyu and Piot, Bilal and de Freitas, Nando},
  year = {2020},
  month = jun,
  howpublished = {arXiv:2006.00979},
  link = {https://arxiv.org/pdf/2006.00979.pdf}
}

Gu, A., Gulcehre, C., Paine, T. L., Hoffman, M., and Pascanu, R. (2019). Improving the Gating Mechanism of Recurrent Neural Networks. arXiv:1910.09890. [pdf] [bibtex]

@techreport{gu:2019,
  title = {Improving the Gating Mechanism of Recurrent Neural Networks},
  author = {Gu, Albert and Gulcehre, Caglar and Paine, Tom Le and Hoffman, Matt and Pascanu, Razvan},
  year = {2019},
  month = oct,
  howpublished = {arXiv:1910.09890},
  link = {https://arxiv.org/pdf/1910.09890.pdf}
}

Paine, T. L., Gulcehre, C., Shahriari, B., Denil, M., Hoffman, M., Soyer, H., Tanburn, R., Kapturowski, S., Rabinowitz, N., Williams, D., Barth-Maron, G., Wang, Z., de Freitas, N., and Team, W. (2019). Making Efficient Use of Demonstrations to Solve Hard Exploration Problems. arXiv:1909.01387. [pdf] [bibtex]

@techreport{paine:2019,
  title = {Making Efficient Use of Demonstrations to Solve Hard Exploration
      Problems},
  author = {Paine, Tom Le and Gulcehre, Caglar and Shahriari, Bobak and Denil, Misha and Hoffman, Matt and Soyer, Hubert and Tanburn, Richard and Kapturowski, Steven and Rabinowitz, Neil and Williams, Duncan and Barth-Maron, Gabriel and Wang, Ziyu and de Freitas, Nando and Team, Worlds},
  year = {2019},
  month = oct,
  howpublished = {arXiv:1909.01387},
  link = {https://arxiv.org/pdf/1909.01387.pdf}
}

Shillingford, B., Assael, Y., Hoffman, M. W., Paine, T., Hughes, C., Prabhu, U., Liao, H., Sak, H., Rao, K., Bennett, L., Mulville, M., Coppin, B., Laurie, B., Senior, A., and de Freitas, N. (2019). Large-scale visual speech recognition. In INTERSPEECH. [pdf] [bibtex]

@inproceedings{shillingford:2019,
  title = {Large-scale visual speech recognition},
  author = {Shillingford, Brendan and Assael, Yannis and Hoffman, Matthew W and Paine, Thomas and Hughes, C{\'i}an and Prabhu, Utsav and Liao, Hank and Sak, Hasim and Rao, Kanishka and Bennett, Lorrayne and Mulville, Marie and Coppin, Ben and Laurie, Ben and Senior, Andrew and de Freitas, Nando},
  booktitle = {INTERSPEECH},
  month = sep,
  year = {2019},
  link = {https://arxiv.org/pdf/1807.05162.pdf}
}

Paine, T. L., Colmenarejo, S. G., Wang, Z., Reed, S., Aytar, Y., Pfaff, T., Hoffman, M. W., Barth-Maron, G., Cabi, S., Budden, D., and de Freitas, N. (2018). One-Shot High-Fidelity Imitation: Training Large-Scale Deep Nets with RL. arXiv:1810.05017. [pdf] [bibtex]

@techreport{paine:2018,
  title = {One-Shot High-Fidelity Imitation: Training Large-Scale Deep Nets
      with RL},
  author = {Paine, Tom Le and Colmenarejo, Sergio Gómez and Wang, Ziyu and Reed, Scott and Aytar, Yusuf and Pfaff, Tobias and Hoffman, Matt W. and Barth-Maron, Gabriel and Cabi, Serkan and Budden, David and de Freitas, Nando},
  year = {2018},
  month = oct,
  howpublished = {arXiv:1810.05017},
  link = {https://arxiv.org/pdf/1810.05017.pdf}
}

Barth-Maron, G., Hoffman, M. W., Budden, D., Dabney, W., Horgan, D., and TB, D., Muldal, A., Heess, N., and Lillicrap, T. (2018). Distributed Distributional Deterministic Policy Gradients. In International Conference on Learning Representations. [pdf] [bibtex]

@inproceedings{barth-maron:2018,
  title = {Distributed Distributional Deterministic Policy Gradients},
  author = {Barth-Maron, Gabriel and Hoffman, Matthew W and Budden, David and Dabney, Will and Horgan, Dan and and TB, Dhruva and Muldal, Alistair and Heess, Nicolas and Lillicrap, Timothy},
  booktitle = {International Conference on Learning Representations},
  month = apr,
  year = {2018},
  link = {https://arxiv.org/pdf/1804.08617.pdf}
}

Cabi, S., Colmenarejo, S. G., Hoffman, M. W., Denil, M., Wang, Z., and Freitas, N. (2017). The Intentional Unintentional Agent: Learning to Solve Many Continuous Control Tasks Simultaneously. In Conference on Robotic Learning. [pdf] [bibtex]

@inproceedings{cabi:2017,
  title = {The Intentional Unintentional Agent: Learning to Solve Many Continuous
      Control Tasks Simultaneously},
  author = {Cabi, Serkan and Colmenarejo, Sergio G{\'o}mez and Hoffman, Matthew W and Denil, Misha and Wang, Ziyu and Freitas, Nando},
  booktitle = {Conference on Robotic Learning},
  month = nov,
  year = {2017},
  link = {https://arxiv.org/pdf/1707.03300.pdf}
}

Chen, Y., Hoffman, M. W., Colmenarejo, S. G., Denil, M., Lillicrap, T. P., Botvinick, M., and de Freitas, N. (2017). Learning to learn without gradient descent by gradient descent. In International Conference on Machine Learning. [pdf] [bibtex]

@inproceedings{chen:2017,
  title = {Learning to learn without gradient descent by gradient descent},
  author = {Chen, Yutian and Hoffman, Matthew W and Colmenarejo, Sergio G{\'o}mez and Denil, Misha and Lillicrap, Timothy P and Botvinick, Matt and de Freitas, Nando},
  booktitle = {International Conference on Machine Learning},
  month = aug,
  year = {2017},
  link = {https://arxiv.org/pdf/1611.03824.pdf}
}

Wichrowska, O., Maheswaranathan, N., Hoffman, M. W., Colmenarejo, S. G., Denil, M., de Freitas, N., and Sohl-Dickstein, J. (2017). Learned optimizers that scale and generalize. International Conference on Machine Learning. [pdf] [bibtex]

@article{wichrowska:2017,
  title = {Learned optimizers that scale and generalize},
  author = {Wichrowska, Olga and Maheswaranathan, Niru and Hoffman, Matthew W and Colmenarejo, Sergio Gomez and Denil, Misha and de Freitas, Nando and Sohl-Dickstein, Jascha},
  journal = {International Conference on Machine Learning},
  month = aug,
  year = {2017},
  link = {https://arxiv.org/pdf/1703.04813.pdf}
}

Andrychowicz, M., Denil, M., Gomez, S., Hoffman, M. W., Pfau, D., Schaul, T., and de Freitas, N. (2016). Learning to learn by gradient descent by gradient descent. In Neural Information Processing Systems. [pdf] [bibtex]

@inproceedings{andrychowicz:2016,
  title = {Learning to learn by gradient descent by gradient descent},
  author = {Andrychowicz, Marcin and Denil, Misha and Gomez, Sergio and Hoffman, Matthew W. and Pfau, David and Schaul, Tom and de Freitas, Nando},
  booktitle = {Neural Information Processing Systems},
  month = dec,
  year = {2016},
  link = {https://arxiv.org/pdf/1606.04474.pdf}
}

Hernández-Lobato, J. M., Gelbart, M. A., Adams, R. P., Hoffman, M. W., and Ghahramani, Z. (2016). A general framework for constrained Bayesian optimization using information-based search. Journal of Machine Learning Research, 17. [pdf] [bibtex]

@article{hernandez-lobato:2016,
  title = {A general framework for constrained Bayesian optimization using
      information-based search},
  author = {Hern{\'a}ndez-Lobato, Jos{\'e} Miguel and Gelbart, Michael A and Adams, Ryan P and Hoffman, Matthew W and Ghahramani, Zoubin},
  journal = {Journal of Machine Learning Research},
  volume = {17},
  month = jun,
  year = {2016},
  link = {https://arxiv.org/pdf/1511.09422.pdf}
}

Hoffman, M. W., and Ghahramani, Z. (2015). Output-Space Predictive Entropy Search for Flexible Global Optimization. In NIPS workshop on Bayesian optimization. [pdf] [bibtex]

@inproceedings{hoffman:2015,
  title = {Output-Space Predictive Entropy Search for Flexible Global
      Optimization},
  author = {Hoffman, Matthew W. and Ghahramani, Zoubin},
  booktitle = {NIPS workshop on Bayesian optimization},
  month = dec,
  year = {2015}
}

Hernández-Lobato, J. M., Gelbart, M. A., Hoffman, M. W., Adams, R. P., and Ghahramani, Z. (2015). Predictive Entropy Search for Bayesian Optimization with Unknown Constraints. In International Conference on Machine Learning. [pdf] [bibtex]

@inproceedings{hernandez-lobato:2015,
  title = {Predictive Entropy Search for Bayesian Optimization with Unknown
      Constraints},
  author = {Hern\'andez-Lobato, Jos\'e Miguel and Gelbart, Michael A. and Hoffman, Matthew W. and Adams, Ryan P. and Ghahramani, Zoubin},
  booktitle = {International Conference on Machine Learning},
  month = aug,
  year = {2015},
  link = {https://arxiv.org/pdf/1502.05312.pdf}
}

Shahriari, B., Wang, Z., Hoffman, M. W., Bouchard-Côté, A., and de Freitas, N. (2015). An Entropy Search Portfolio for Bayesian Optimization. arXiv:1406.4625. [pdf] [bibtex]

@techreport{shahriari:2015,
  title = {An Entropy Search Portfolio for Bayesian Optimization},
  author = {Shahriari, Bobak and Wang, Ziyu and Hoffman, Matthew W. and Bouchard-C\^ot\'e, Alexandre and de Freitas, Nando},
  howpublished = {arXiv:1406.4625},
  year = {2015},
  link = {http://arxiv.org/pdf/1406.4625.pdf}
}

Hoffman, M. W., and Shahriari, B. (2014). Modular mechanisms for Bayesian optimization. In NIPS workshop on Bayesian optimization. [pdf] [bibtex]

@inproceedings{hoffman:2014b,
  title = {Modular mechanisms for Bayesian optimization},
  author = {Hoffman, Matthew W. and Shahriari, Bobak},
  booktitle = {NIPS workshop on Bayesian optimization},
  month = dec,
  year = {2014}
}

Hernández-Lobato, J. M., Hoffman, M. W., and Ghahramani, Z. (2014). Predictive Entropy Search for Efficient Global Optimization of Black-box Functions. In Neural Information Processing Systems. [pdf] [bibtex]

@inproceedings{hernandez-lobato:2014,
  title = {Predictive Entropy Search for Efficient Global Optimization of
      Black-box Functions},
  author = {Hern\'andez-Lobato, Jos\'e Miguel and Hoffman, Matthew W. and Ghahramani, Zoubin},
  booktitle = {Neural Information Processing Systems},
  month = dec,
  year = {2014},
  link = {https://arxiv.org/pdf/1406.2541}
}

Hoffman, M. W., Shahriari, B., and de Freitas, N. (2014). On correlation and budget constraints in model-based bandit optimization with application to automatic machine learning. In International Conference on Artificial Intelligence and Statistics. [pdf] [bibtex]

@inproceedings{hoffman:2014,
  title = {On correlation and budget constraints in model-based bandit
      optimization with application to automatic machine learning},
  author = {Hoffman, Matthew W and Shahriari, Bobak and de Freitas, Nando},
  booktitle = {International Conference on Artificial Intelligence and Statistics},
  month = apr,
  year = {2014}
}

Hoffman, M. W., and de Freitas, N. (2012). Inference strategies for solving semi-Markov decision processes. In L. E. Sucar, E. F. Morales, and J. Hoey (Eds.), Decision Theory Models for Applications in Artificial Intelligence: Concepts and Solutions. IGI Global. [pdf] [bibtex]

@incollection{hoffman:2012a,
  title = {Inference strategies for solving semi-{Markov} decision processes},
  author = {Hoffman, Matthew W. and de Freitas, Nando},
  booktitle = {Decision Theory Models for Applications in Artificial
      Intelligence: Concepts and Solutions},
  editor = {Sucar, L. Enrique and Morales, Eduardo F. and Hoey, Jesse},
  publisher = {IGI Global},
  year = {2012}
}

Hoffman, M. W., Lazaric, A., Ghavamzadeh, M., and Munos, R. (2012). Regularized Least Squares Temporal Difference Learning with Nested ell_2 and ell_1 Penalization. In European Workshop on Reinforcement Learning. [pdf] [bibtex]

@inproceedings{hoffman:2012b,
  title = {Regularized Least Squares Temporal Difference Learning with Nested
      ell_2 and ell_1 Penalization},
  author = {Hoffman, Matthew W and Lazaric, Alessandro and Ghavamzadeh, Mohammad and Munos, R\'emi},
  booktitle = {European Workshop on Reinforcement Learning},
  series = {Recent Advances in Machine Learning},
  year = {2012}
}

Ghavamzadeh, M., Lazaric, A., Hoffman, M. W., and Munos, R. (2011). Finite-Sample Analysis of Lasso-TD. In International Conference on Machine Learning. [pdf] [bibtex]

@inproceedings{ghavamzadeh:2011,
  title = {Finite-Sample Analysis of {Lasso-TD}},
  author = {Ghavamzadeh, Mohammad and Lazaric, Alessandro and Hoffman, Matthew W. and Munos, R\'emi},
  booktitle = {International Conference on Machine Learning},
  year = {2011}
}

Hoffman, M. W., Brochu, E., and de Freitas, N. (2011). Portfolio Allocation for Bayesian Optimization. In Uncertainty in Artificial Intelligence. [pdf] [bibtex]

@inproceedings{hoffman:2011,
  title = {Portfolio Allocation for {Bayesian} Optimization},
  author = {Hoffman, Matthew W and Brochu, Eric and de Freitas, Nando},
  booktitle = {Uncertainty in Artificial Intelligence},
  year = {2011}
}

Hoffman, M. W., Kueck, H., de Freitas, N., and Doucet, A. (2009). New inference strategies for solving Markov decision processes using reversible jump MCMC. In Uncertainty in Artificial Intelligence. [pdf] [bibtex]

@inproceedings{hoffman:2009b,
  title = {New inference strategies for solving {Markov} decision processes
      using reversible jump {MCMC}},
  author = {Hoffman, Matthew W and Kueck, Hendrik and de Freitas, Nando and Doucet, Arnaud},
  booktitle = {Uncertainty in Artificial Intelligence},
  year = {2009}
}

Hoffman, M. W., de Freitas, N., Doucet, A., and Peters, J. (2009). An Expectation Maximization algorithm for continuous Markov Decision Processes with arbitrary reward. In International Conference on Artificial Intelligence and Statistics. [pdf] [code] [bibtex]

@inproceedings{hoffman:2009a,
  title = {An {Expectation Maximization} algorithm for continuous {Markov}
      Decision Processes with arbitrary reward},
  author = {Hoffman, Matthew W. and de Freitas, Nando and Doucet, Arnaud and Peters, Jan},
  booktitle = {International Conference on Artificial Intelligence and Statistics},
  year = {2009},
  code = {https://github.com/mwhoffman/mogmdp}
}

Kueck, H., Hoffman, M. W., Doucet, A., and de Freitas, N. (2009). Inference and Learning for Active Sensing, Experimental Design and Control. In Iberian Conference on Pattern Recognition and Image Analysis. [pdf] [bibtex]

@incollection{kueck:2009,
  title = {Inference and Learning for Active Sensing, Experimental Design and
      Control},
  author = {Kueck, Hendrik and Hoffman, Matthew W. and Doucet, Arnaud and de Freitas, Nando},
  booktitle = {Iberian Conference on Pattern Recognition and Image Analysis},
  year = {2009}
}

Hoffman, M. W., Doucet, A., de Freitas, N., and Jasra, A. (2007). Bayesian policy learning with trans-dimensional MCMC. In Neural Information Processing Systems. [pdf] [bibtex]

@inproceedings{hoffman:2007,
  title = {Bayesian policy learning with trans-dimensional {MCMC}},
  author = {Hoffman, Matthew W. and Doucet, Arnaud and de Freitas, Nando and Jasra, Ajay},
  booktitle = {Neural Information Processing Systems},
  year = {2007}
}

Hoffman, M. W., Doucet, A., de Freitas, N., and Jasra, A. (2007). On solving general state-space sequential decision problems using inference algorithms (No. TR-2007-04). University of British Columbia, Computer Science. [pdf] [bibtex]

@techreport{hoffman:2007a,
  title = {On solving general state-space sequential decision problems using
      inference algorithms},
  author = {Hoffman, Matthew W. and Doucet, Arnaud and de Freitas, Nando and Jasra, Ajay},
  institution = {University of British Columbia, Computer Science},
  number = {TR-2007-04},
  year = {2007}
}

Hoffman, M. W., Grimes, D. B., Shon, A. P., and Rao, R. P. N. (2006). A probabilistic model of gaze imitation and shared attention. Neural Networks, 19. [pdf] [bibtex]

@article{hoffman:2006,
  title = {A probabilistic model of gaze imitation and shared attention},
  author = {Hoffman, Matthew W. and Grimes, David B. and Shon, Aaron P. and Rao, Rajesh P.~N.},
  journal = {Neural Networks},
  volume = {19},
  year = {2006}
}

Shon, A. P., Grimes, D. B., Baker, C. L., Hoffman, M. W., Zhou, S., and Rao, R. P. N. (2005). Probabilistic gaze imitation and saliency learning in a robotic head. In International Conference on Robotics and Automation. [pdf] [bibtex]

@inproceedings{shon:2005,
  title = {Probabilistic gaze imitation and saliency learning in a robotic head},
  author = {Shon, Aaron P and Grimes, David B and Baker, Chris L and Hoffman, Matthew W and Zhou, Shengli and Rao, Rajesh PN},
  booktitle = {International Conference on Robotics and Automation},
  year = {2005}
}

Thesis

Hoffman, M. W. (2013). Decision making with inference and learning methods (PhD thesis). University of British Columbia. [pdf] [bibtex]

@phdthesis{hoffman:2013:thesis,
  title = {Decision making with inference and learning methods},
  author = {Hoffman, Matthew W},
  school = {University of British Columbia},
  year = {2013}
}