Kamil Ciosek

Machine Learning Specialist

kamil.ciosek@posteo.net

My photograph
About Me

I am a machine learning (ML) researcher with a focus on reinforcement learning (RL). The kind of practical problem that I find most interesting is making decisions on the basis of finite data. Bandits, MDPs and their generalizations (POMDPs, games) are my main modeling tools and I am interested in solving them in novel and useful ways. In particular, I believe that finding the right ways to quantify uncertainty in complex deep RL models is one of the most promising approaches to improving the sample efficiency of synthetic controllers. I am also interested in meta learning and imitation learning. An important part of my research is motivated by applying RL to computer games.

Publications
  1. Li, J., Vuong, Q., Liu, S., Liu, M., Ciosek, K., Christensen, H., & Su, H. (2020). Multi-task Batch Reinforcement Learning with Metric Learning. NeurIPS. pdf
    BibTeX
    @inproceedings{multitask-batch-rl,
      author = {Li, Jiachen and Vuong, Quan and Liu, Shuang and Liu, Minghua and Ciosek, Kamil and Christensen, Henrik and Su, Hao},
      title = {Multi-task Batch Reinforcement Learning with Metric Learning},
      booktitle = {NeurIPS},
      year = {2020},
      pdf = {https://arxiv.org/pdf/1909.11373.pdf}
    }
    
  2. Beck, J., Ciosek, K., Devlin, S., Tschiatschek, S., Zhang, C., & Hofmann, K. (2020). AMRL: Aggregated Memory for Reinforcement Learning. International Conference on Learning Representations. pdf
    BibTeX
    @inproceedings{beck2020amrl,
      title = {{AMRL}: {A}ggregated {M}emory for {R}einforcement {L}earning},
      author = {Beck, Jacob and Ciosek, Kamil and Devlin, Sam and Tschiatschek, Sebastian and Zhang, Cheng and Hofmann, Katja},
      booktitle = {International Conference on Learning Representations},
      year = {2020},
      pdf = {https://openreview.net/pdf?id=Bkl7bREtDr}
    }
    
  3. Ciosek, K., Fortuin, V., Tomioka, R., Hofmann, K., & Turner, R. (2020). Conservative Uncertainty Estimation By Fitting Prior Networks. International Conference on Learning Representations. pdf
    BibTeX
    @inproceedings{ciosek2020conservative,
      title = {{C}onservative {U}ncertainty {E}stimation By {F}itting  {P}rior {N}etworks},
      author = {Ciosek, Kamil and Fortuin, Vincent and Tomioka, Ryota and Hofmann, Katja and Turner, Richard},
      booktitle = {International Conference on Learning Representations},
      year = {2020},
      pdf = {https://openreview.net/pdf?id=BJlahxHYDS}
    }
    
  4. R. Amit, R. Meir, K. Ciosek, “Discount Factor as a Regularizer in Reinforcement Learning” ICML 2020. pdf
    BibTeX
    @inproceedings{amit2020discount,
      title = {Discount Factor as a Regularizer in Reinforcement Learning},
      author = {Amit, Ron and Meir, Ron and Ciosek, Kamil},
      booktitle={International Conference on Machine Learning (ICML)},
      pages = {2760--2769},
      year={2020}
    }
    
  5. Ciosek, K., & Whiteson, S. (2020). Expected Policy Gradients for Reinforcement Learning. Journal of Machine Learning Research, 21(52), 1–51. Submitted in January 2018 pdf
    BibTeX
    @article{epg-journal,
      author = {Ciosek, Kamil and Whiteson, Shimon},
      title = {Expected Policy Gradients for Reinforcement Learning},
      journal = {Journal of Machine Learning Research},
      year = {2020},
      volume = {21},
      number = {52},
      pages = {1-51},
      pdf = {http://www.jmlr.org/papers/volume21/18-012/18-012.pdf},
      note = {Submitted in January 2018}
    }
    
  6. Igl, M., Ciosek, K., Li, Y., Tschiatschek, S., Zhang, C., Devlin, S., & Hofmann, K. (2019). Generalization in Reinforcement Learning with Selective Noise Injection and Information Bottleneck. NeurIPS. pdf
    BibTeX
    @inproceedings{sni-rl,
      author = {Igl, Maximilian and Ciosek, Kamil and Li, Yingzhen and Tschiatschek, Sebastian and Zhang, Cheng and Devlin, Sam and Hofmann, Katja},
      title = {{G}eneralization in {R}einforcement {L}earning with {S}elective {N}oise {I}njection
                     and {I}nformation {B}ottleneck},
      booktitle = {NeurIPS},
      year = {2019},
      pdf = {https://arxiv.org/pdf/1910.12911}
    }
    
  7. Ciosek, K., Vuong, Q., Loftin, R., & Hofmann, K. (2019). Better Exploration with Optimistic Actor-Critic. NeurIPS. pdf
    BibTeX
    @inproceedings{oac-pg,
      author = {Ciosek, Kamil and Vuong, Quan and Loftin, Robert and Hofmann, Katja},
      title = {Better Exploration with Optimistic Actor-Critic},
      booktitle = {NeurIPS},
      year = {2019},
      pdf = {https://arxiv.org/pdf/1910.12807}
    }
    
  8. Fellows, M., Ciosek, K., & Whiteson, S. (2018). Fourier Policy Gradients. ICML. pdf
    BibTeX
    @inproceedings{fourier-pg,
      author = {Fellows, Matthew and Ciosek, Kamil and Whiteson, Shimon},
      booktitle = {ICML},
      date-added = {2018-02-25 15:17:14 +0000},
      date-modified = {2018-06-04 12:39:30 +0000},
      eprint = {1802.06891},
      pdf = {https://arxiv.org/pdf/1802.06891},
      title = {{F}ourier {P}olicy {G}radients},
      year = {2018}
    }
    
  9. Ciosek, K., & Whiteson, S. (2018). Expected Policy Gradients. The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18). pdf
    BibTeX
    @article{epg-aaai,
      author = {Ciosek, Kamil and Whiteson, Shimon},
      date-added = {2017-12-14 15:15:13 +0000},
      date-modified = {2017-12-14 16:18:43 +0000},
      journal = {The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)},
      pdf = {./dwnl/ciosek-whiteson-epg.pdf},
      title = {{E}xpected {P}olicy {G}radients},
      year = {2018}
    }
    
  10. Paul, S., Chatzilygeroudis, K., Ciosek, K., Mouret, J.-B., Osborne, M. A., & Whiteson, S. (2018). Alternating Optimisation and Quadrature for Robust Control. The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18). pdf
    BibTeX
    @article{aloq,
      author = {Paul, Supratik and Chatzilygeroudis, Konstantinos and Ciosek, Kamil and Mouret, Jean-Baptiste and Osborne, Michael A. and Whiteson, Shimon},
      date-added = {2017-12-14 15:17:17 +0000},
      date-modified = {2017-12-14 16:10:14 +0000},
      journal = {The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)},
      pdf = {./dwnl/aloq.pdf},
      title = {{A}lternating {O}ptimisation and {Q}uadrature for {R}obust {C}ontrol},
      year = {2018}
    }
    
  11. Ciosek, K., & Whiteson, S. (2017). OFFER: Off-Environment Reinforcement Learning. The Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17). pdf
    BibTeX
    @article{offer,
      author = {Ciosek, Kamil and Whiteson, Shimon},
      date-added = {2017-12-14 15:17:29 +0000},
      date-modified = {2017-12-14 16:18:23 +0000},
      journal = {The Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17)},
      pdf = {./dwnl/offer.pdf},
      title = {{OFFER}: {O}ff-{E}nvironment {R}einforcement {L}earning.},
      year = {2017}
    }
    
  12. Ciosek, K., & Whiteson, S. (2016). Off-Environment RL with Rare Events. NIPS Workshop on Optimizing the Optimizers. pdf
    BibTeX
    @article{off-env-wk,
      author = {Ciosek, Kamil and Whiteson, Shimon},
      date-added = {2017-12-14 16:17:37 +0000},
      date-modified = {2017-12-14 16:18:08 +0000},
      journal = {NIPS workshop on Optimizing the Optimizers},
      pdf = {./dwnl/offer.pdf},
      title = {{O}ff-{E}nvironment {RL} with {R}are {E}vents},
      year = {2016}
    }
    
  13. Ciosek, K., & Silver, D. (2015). Value Iteration with Options and State Aggregation. In Proceedings of the 5th Workshop on Planning and Learning, ICAPS. pdf
    BibTeX
    @article{opt-aggr,
      author = {Ciosek, Kamil and Silver, David},
      date-added = {2017-12-14 15:17:49 +0000},
      date-modified = {2017-12-14 16:19:13 +0000},
      journal = {In Proceedings of the 5th Workshop on Planning and Learning, ICAPS.},
      month = jun,
      pdf = {./dwnl/opt-aggr.pdf},
      title = {{V}alue {I}teration with {O}ptions and {S}tate {A}ggregation.},
      year = {2015}
    }
    
  14. Ciosek, K. (2015). Linear Reinforcement Learning with Options. [Ph. D. thesis]. University College London. pdf
    BibTeX
    @phdthesis{ciosek-thesis,
      author = {Ciosek, Kamil},
      date-added = {2017-12-14 15:20:33 +0000},
      date-modified = {2017-12-14 15:34:57 +0000},
      pdf = {./dwnl/phd-thesis.pdf},
      school = {University College London.},
      title = {{L}inear {R}einforcement {L}earning with {O}ptions.},
      type = {{P}h.\ {D}.\ thesis},
      year = {2015}
    }
    
  15. Silver, D., & Ciosek, K. (2012). Compositional Planning Using Optimal Option Models. ICML. pdf
    BibTeX
    @inproceedings{silver-ciosek-options,
      author = {Silver, David and Ciosek, Kamil},
      booktitle = {ICML},
      date-added = {2017-12-14 15:24:19 +0000},
      date-modified = {2017-12-14 16:19:37 +0000},
      pdf = {./dwnl/composition.pdf},
      title = {{C}ompositional {P}lanning {U}sing {O}ptimal {O}ption {M}odels},
      year = {2012}
    }
    
  16. Ciosek, K., & Kotowski, P. (2009). Generating 3D Plants using Lindenmayer System. GRAPP, 76–81. pdf
    BibTeX
    @inproceedings{lindenmayer-plants,
      author = {Ciosek, Kamil and Kotowski, Pawe{\l}},
      booktitle = {GRAPP},
      date-added = {2017-12-14 15:24:28 +0000},
      date-modified = {2017-12-14 16:19:49 +0000},
      pages = {76-81},
      pdf = {./dwnl/lindenmayer.pdf},
      title = {{G}enerating 3D {P}lants using {L}indenmayer {S}ystem},
      year = {2009}
    }