Kamil Ciosek

Machine Learning Specialist

kamil.ciosek@posteo.net

My photograph
About Me

I am a machine learning (ML) researcher with a focus on reinforcement learning (RL). The kind of practical problem that I find most interesting is making decisions on the basis of finite data. Bandits, MDPs and their generalizations (POMDPs, games) are my main modeling tools and I am interested in solving them in novel and useful ways. In particular, I believe that finding the right ways to quantify uncertainty in complex deep RL models is one of the most promising approaches to improving the sample efficiency of synthetic controllers. I am also interested in meta learning and imitation learning.

Publications
  1. Rashid, T., Zhang, C., Ciosek, K. (2021). Estimating α-Rank by Maximizing Information Gain AAAI 2021. pdf
    BibTeX
    @inproceedings{rashid2021alpha,
                      title = { Estimating alpha-Rank by Maximizing Information Gain },
                      author = {Rashid, Tabish and Zhang, Cheng and Ciosek, Kamil},
                      booktitle = {AAAI Conference on Artificial Intelligence},
                      year = {2021},
                      pdf = { https://arxiv.org/abs/2101.09178" }
                    }
                    
  2. Li, J., Vuong, Q., Liu, S., Liu, M., Ciosek, K., Christensen, H., & Su, H. (2020). Multi-task Batch Reinforcement Learning with Metric Learning. NeurIPS. pdf
    BibTeX
    @inproceedings{multitask-batch-rl,
      author = {Li, Jiachen and Vuong, Quan and Liu, Shuang and Liu, Minghua and Ciosek, Kamil and Christensen, Henrik and Su, Hao},
      title = {Multi-task Batch Reinforcement Learning with Metric Learning},
      booktitle = {NeurIPS},
      year = {2020},
      pdf = {https://arxiv.org/pdf/1909.11373.pdf}
    }
    
  3. Beck, J., Ciosek, K., Devlin, S., Tschiatschek, S., Zhang, C., & Hofmann, K. (2020). AMRL: Aggregated Memory for Reinforcement Learning. International Conference on Learning Representations. pdf
    BibTeX
    @inproceedings{beck2020amrl,
      title = {{AMRL}: {A}ggregated {M}emory for {R}einforcement {L}earning},
      author = {Beck, Jacob and Ciosek, Kamil and Devlin, Sam and Tschiatschek, Sebastian and Zhang, Cheng and Hofmann, Katja},
      booktitle = {International Conference on Learning Representations},
      year = {2020},
      pdf = {https://openreview.net/pdf?id=Bkl7bREtDr}
    }
    
  4. Ciosek, K., Fortuin, V., Tomioka, R., Hofmann, K., & Turner, R. (2020). Conservative Uncertainty Estimation By Fitting Prior Networks. International Conference on Learning Representations. pdf
    BibTeX
    @inproceedings{ciosek2020conservative,
      title = {{C}onservative {U}ncertainty {E}stimation By {F}itting  {P}rior {N}etworks},
      author = {Ciosek, Kamil and Fortuin, Vincent and Tomioka, Ryota and Hofmann, Katja and Turner, Richard},
      booktitle = {International Conference on Learning Representations},
      year = {2020},
      pdf = {https://openreview.net/pdf?id=BJlahxHYDS}
    }
    
  5. R. Amit, R. Meir, K. Ciosek, “Discount Factor as a Regularizer in Reinforcement Learning” ICML 2020. pdf
    BibTeX
    @inproceedings{amit2020discount,
      title = {Discount Factor as a Regularizer in Reinforcement Learning},
      author = {Amit, Ron and Meir, Ron and Ciosek, Kamil},
      booktitle={International Conference on Machine Learning (ICML)},
      pages = {2760--2769},
      year={2020}
    }
    
  6. Ciosek, K., & Whiteson, S. (2020). Expected Policy Gradients for Reinforcement Learning. Journal of Machine Learning Research, 21(52), 1–51. Submitted in January 2018 pdf
    BibTeX
    @article{epg-journal,
      author = {Ciosek, Kamil and Whiteson, Shimon},
      title = {Expected Policy Gradients for Reinforcement Learning},
      journal = {Journal of Machine Learning Research},
      year = {2020},
      volume = {21},
      number = {52},
      pages = {1-51},
      pdf = {http://www.jmlr.org/papers/volume21/18-012/18-012.pdf},
      note = {Submitted in January 2018}
    }
    
  7. Igl, M., Ciosek, K., Li, Y., Tschiatschek, S., Zhang, C., Devlin, S., & Hofmann, K. (2019). Generalization in Reinforcement Learning with Selective Noise Injection and Information Bottleneck. NeurIPS. pdf
    BibTeX
    @inproceedings{sni-rl,
      author = {Igl, Maximilian and Ciosek, Kamil and Li, Yingzhen and Tschiatschek, Sebastian and Zhang, Cheng and Devlin, Sam and Hofmann, Katja},
      title = {{G}eneralization in {R}einforcement {L}earning with {S}elective {N}oise {I}njection
                     and {I}nformation {B}ottleneck},
      booktitle = {NeurIPS},
      year = {2019},
      pdf = {https://arxiv.org/pdf/1910.12911}
    }
    
  8. Ciosek, K., Vuong, Q., Loftin, R., & Hofmann, K. (2019). Better Exploration with Optimistic Actor-Critic. NeurIPS. pdf
    BibTeX
    @inproceedings{oac-pg,
      author = {Ciosek, Kamil and Vuong, Quan and Loftin, Robert and Hofmann, Katja},
      title = {Better Exploration with Optimistic Actor-Critic},
      booktitle = {NeurIPS},
      year = {2019},
      pdf = {https://arxiv.org/pdf/1910.12807}
    }
    
  9. Fellows, M., Ciosek, K., & Whiteson, S. (2018). Fourier Policy Gradients. ICML. pdf
    BibTeX
    @inproceedings{fourier-pg,
      author = {Fellows, Matthew and Ciosek, Kamil and Whiteson, Shimon},
      booktitle = {ICML},
      date-added = {2018-02-25 15:17:14 +0000},
      date-modified = {2018-06-04 12:39:30 +0000},
      eprint = {1802.06891},
      pdf = {https://arxiv.org/pdf/1802.06891},
      title = {{F}ourier {P}olicy {G}radients},
      year = {2018}
    }
    
  10. Ciosek, K., & Whiteson, S. (2018). Expected Policy Gradients. The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18). pdf
    BibTeX
    @article{epg-aaai,
      author = {Ciosek, Kamil and Whiteson, Shimon},
      date-added = {2017-12-14 15:15:13 +0000},
      date-modified = {2017-12-14 16:18:43 +0000},
      journal = {The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)},
      pdf = {./dwnl/ciosek-whiteson-epg.pdf},
      title = {{E}xpected {P}olicy {G}radients},
      year = {2018}
    }
    
  11. Paul, S., Chatzilygeroudis, K., Ciosek, K., Mouret, J.-B., Osborne, M. A., & Whiteson, S. (2018). Alternating Optimisation and Quadrature for Robust Control. The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18). pdf
    BibTeX
    @article{aloq,
      author = {Paul, Supratik and Chatzilygeroudis, Konstantinos and Ciosek, Kamil and Mouret, Jean-Baptiste and Osborne, Michael A. and Whiteson, Shimon},
      date-added = {2017-12-14 15:17:17 +0000},
      date-modified = {2017-12-14 16:10:14 +0000},
      journal = {The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)},
      pdf = {./dwnl/aloq.pdf},
      title = {{A}lternating {O}ptimisation and {Q}uadrature for {R}obust {C}ontrol},
      year = {2018}
    }
    
  12. Ciosek, K., & Whiteson, S. (2017). OFFER: Off-Environment Reinforcement Learning. The Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17). pdf
    BibTeX
    @article{offer,
      author = {Ciosek, Kamil and Whiteson, Shimon},
      date-added = {2017-12-14 15:17:29 +0000},
      date-modified = {2017-12-14 16:18:23 +0000},
      journal = {The Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17)},
      pdf = {./dwnl/offer.pdf},
      title = {{OFFER}: {O}ff-{E}nvironment {R}einforcement {L}earning.},
      year = {2017}
    }
    
  13. Ciosek, K., & Whiteson, S. (2016). Off-Environment RL with Rare Events. NIPS Workshop on Optimizing the Optimizers. pdf
    BibTeX
    @article{off-env-wk,
      author = {Ciosek, Kamil and Whiteson, Shimon},
      date-added = {2017-12-14 16:17:37 +0000},
      date-modified = {2017-12-14 16:18:08 +0000},
      journal = {NIPS workshop on Optimizing the Optimizers},
      pdf = {./dwnl/offer.pdf},
      title = {{O}ff-{E}nvironment {RL} with {R}are {E}vents},
      year = {2016}
    }
    
  14. Ciosek, K., & Silver, D. (2015). Value Iteration with Options and State Aggregation. In Proceedings of the 5th Workshop on Planning and Learning, ICAPS. pdf
    BibTeX
    @article{opt-aggr,
      author = {Ciosek, Kamil and Silver, David},
      date-added = {2017-12-14 15:17:49 +0000},
      date-modified = {2017-12-14 16:19:13 +0000},
      journal = {In Proceedings of the 5th Workshop on Planning and Learning, ICAPS.},
      month = jun,
      pdf = {./dwnl/opt-aggr.pdf},
      title = {{V}alue {I}teration with {O}ptions and {S}tate {A}ggregation.},
      year = {2015}
    }
    
  15. Ciosek, K. (2015). Linear Reinforcement Learning with Options. [Ph. D. thesis]. University College London. pdf
    BibTeX
    @phdthesis{ciosek-thesis,
      author = {Ciosek, Kamil},
      date-added = {2017-12-14 15:20:33 +0000},
      date-modified = {2017-12-14 15:34:57 +0000},
      pdf = {./dwnl/phd-thesis.pdf},
      school = {University College London.},
      title = {{L}inear {R}einforcement {L}earning with {O}ptions.},
      type = {{P}h.\ {D}.\ thesis},
      year = {2015}
    }
    
  16. Silver, D., & Ciosek, K. (2012). Compositional Planning Using Optimal Option Models. ICML. pdf
    BibTeX
    @inproceedings{silver-ciosek-options,
      author = {Silver, David and Ciosek, Kamil},
      booktitle = {ICML},
      date-added = {2017-12-14 15:24:19 +0000},
      date-modified = {2017-12-14 16:19:37 +0000},
      pdf = {./dwnl/composition.pdf},
      title = {{C}ompositional {P}lanning {U}sing {O}ptimal {O}ption {M}odels},
      year = {2012}
    }
    
  17. Ciosek, K., & Kotowski, P. (2009). Generating 3D Plants using Lindenmayer System. GRAPP, 76–81. pdf
    BibTeX
    @inproceedings{lindenmayer-plants,
      author = {Ciosek, Kamil and Kotowski, Pawe{\l}},
      booktitle = {GRAPP},
      date-added = {2017-12-14 15:24:28 +0000},
      date-modified = {2017-12-14 16:19:49 +0000},
      pages = {76-81},
      pdf = {./dwnl/lindenmayer.pdf},
      title = {{G}enerating 3D {P}lants using {L}indenmayer {S}ystem},
      year = {2009}
    }