Kamil Ciosek

Machine Learning Researcher

kamil.ciosek@posteo.net

My photograph
About Me

I am a machine learning (ML) researcher with a focus on reinforcement learning (RL). The kind of practical problem that I find most interesting is making decisions on the basis of finite data. Bandits, MDPs and their generalizations (POMDPs, games) are my main modeling tools and I am interested in solving them in novel and useful ways. In particular, I believe that finding the right ways to quantify uncertainty in complex deep RL models is one of the most promising approaches to improving the sample efficiency of synthetic controllers. I am also interested in imitation learning.

Publications
  1. McDonald, T., Maystre, L., Lalmas, M., Russo, D., Ciosek, K. Impatient Bandits: Optimizing Recommendations for the Long-Term Without Delay KDD 2023. pdf
    BibTeX
    @inproceedings{tslt,
                        title = { Impatient Bandits: Optimizing Recommendations for the Long-Term Without Delay },
                        author = {McDonald, Thomas and Maystre, Lucas and Lalmas, Mounia, and Russo, Daniel and Ciosek, Kamil},
                        booktitle = {KDD},
                        year = {2023}
                        }
                        
  2. Tomasi, F., Cauteruccio, J., Kanoria, S., Ciosek, K., Rinaldi, M., Dai, Z. Automatic Music Playlist Generation via Simulation-based Reinforcement Learning KDD 2023. pdf
    BibTeX
    @inproceedings{mbrlp,
                        title = { Automatic Music Playlist Generation via Simulation-based Reinforcement Learning },
                        author = {Tomasi, Federico and Cauteruccio, Joe and Kanoria, Surya and Ciosek, Kamil and Rinaldi, Matteo and Dai, Zhenwen},
                        booktitle = {KDD},
                        year = {2023}
                        }
                        
  3. Ciosek, K. Imitation Learning by Reinforcement Learning ICLR 2022. pdf code
    BibTeX
    @inproceedings{ilr,
                        title = { Imitation Learning by Reinforcement Learning },
                        author = {Ciosek, Kamil},
                        booktitle = {ICLR},
                        year = {2022},
                        pdf = { https://openreview.net/pdf?id=1zwleytEpYx }
                        }
                        
  4. Lindner, D., Turchetta, M., Tschiatschek, S., Ciosek, K., Krause, A. (2021) Information Directed Reward Learning for Reinforcement Learning NeurIPS 2021. pdf
    BibTeX
    @inproceedings{idRL,
      title = { Information Directed Reward Learning for Reinforcement Learning },
      author = {Lindner, David and Turchetta, Matteo and Tschiatschek, Sebastian and Ciosek, Kamil and Krause, Andreas},
      booktitle = {NeurIPS},
      year = {2021},
      pdf = { https://openreview.net/pdf?id=t5-Mszu1UkO }
      }
      
  5. Husain, H., Ciosek, K., Tomioka, R (2021) Regularized Policies are Reward Robust AISTATS 2021. pdf
    BibTeX
    @inproceedings{huseinPolicies,
        title = { Regularized Policies are Reward Robust },
        author = {Husain, Hisham and Ciosek, Kamil and Tomioka, Ryota},
        booktitle = {AISTATS},
        year = {2021},
        pdf = { http://proceedings.mlr.press/v130/husain21a/husain21a.pdf }
        }
        
  6. Zintgraf, L., Devlin, S., Ciosek, K., Whiteson, S., Hofmann, K. (2021) Deep Interactive Bayesian Reinforcement Learning via Meta-Learning AAMAS 2021. pdf
    BibTeX
    @inproceedings{bayesianRLMeta,
    title = { Deep Interactive Bayesian Reinforcement Learning via Meta-Learning},
    author = {Zintgraf, Luisa and Devlin, Sam and Ciosek, Kamil and Whiteson, Shimon and Hofmann, Katja },
    booktitle = {AAMAS},
    year = {2021},
    pdf = { https://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1712.pdf }
    }
    
  7. Knott, P., Carroll, M., Devlin, S., Ciosek, K., Hofmann, K., Dragan, A., Shah, R.(2021) Evaluating the Robustness of Collaborative Agents. AAMAS 2021. pdf
    BibTeX
    @inproceedings{knottRobustness,
    title = { Evaluating the Robustness of Collaborative Agents. },
    author = {Knott, Paul and Carroll, Micah and Devlin, Sam and Ciosek, Kamil and Hofmann, Katja and Dragan, Anca and Shah, Rohin},
    booktitle = {AAMAS},
    year = {2021},
    pdf = { https://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1560.pdf }
    }
    
  8. Rashid, T., Zhang, C., Ciosek, K. (2021). Estimating α-Rank by Maximizing Information Gain AAAI 2021. pdf
    BibTeX
    @inproceedings{rashid2021alpha,
      title = { Estimating alpha-Rank by Maximizing Information Gain },
      author = {Rashid, Tabish and Zhang, Cheng and Ciosek, Kamil},
      booktitle = {AAAI Conference on Artificial Intelligence},
      year = {2021},
      pdf = { https://arxiv.org/abs/2101.09178" }
    }
    
  9. Li, J., Vuong, Q., Liu, S., Liu, M., Ciosek, K., Christensen, H., & Su, H. (2020). Multi-task Batch Reinforcement Learning with Metric Learning. NeurIPS. pdf
    BibTeX
    @inproceedings{multitask-batch-rl,
      author = {Li, Jiachen and Vuong, Quan and Liu, Shuang and Liu, Minghua and Ciosek, Kamil and Christensen, Henrik and Su, Hao},
      title = {Multi-task Batch Reinforcement Learning with Metric Learning},
      booktitle = {NeurIPS},
      year = {2020},
      pdf = {https://arxiv.org/pdf/1909.11373.pdf}
    }
    
  10. Beck, J., Ciosek, K., Devlin, S., Tschiatschek, S., Zhang, C., & Hofmann, K. (2020). AMRL: Aggregated Memory for Reinforcement Learning. International Conference on Learning Representations. pdf
    BibTeX
    @inproceedings{beck2020amrl,
      title = {{AMRL}: {A}ggregated {M}emory for {R}einforcement {L}earning},
      author = {Beck, Jacob and Ciosek, Kamil and Devlin, Sam and Tschiatschek, Sebastian and Zhang, Cheng and Hofmann, Katja},
      booktitle = {International Conference on Learning Representations},
      year = {2020},
      pdf = {https://openreview.net/pdf?id=Bkl7bREtDr}
    }
    
  11. Ciosek, K., Fortuin, V., Tomioka, R., Hofmann, K., & Turner, R. (2020). Conservative Uncertainty Estimation By Fitting Prior Networks. International Conference on Learning Representations. pdf
    BibTeX
    @inproceedings{ciosek2020conservative,
      title = {{C}onservative {U}ncertainty {E}stimation By {F}itting  {P}rior {N}etworks},
      author = {Ciosek, Kamil and Fortuin, Vincent and Tomioka, Ryota and Hofmann, Katja and Turner, Richard},
      booktitle = {International Conference on Learning Representations},
      year = {2020},
      pdf = {https://openreview.net/pdf?id=BJlahxHYDS}
    }
    
  12. R. Amit, R. Meir, K. Ciosek, “Discount Factor as a Regularizer in Reinforcement Learning” ICML 2020. pdf
    BibTeX
    @inproceedings{amit2020discount,
      title = {Discount Factor as a Regularizer in Reinforcement Learning},
      author = {Amit, Ron and Meir, Ron and Ciosek, Kamil},
      booktitle={International Conference on Machine Learning (ICML)},
      pages = {2760--2769},
      year={2020}
    }
    
  13. Ciosek, K., & Whiteson, S. (2020). Expected Policy Gradients for Reinforcement Learning. Journal of Machine Learning Research, 21(52), 1–51. Submitted in January 2018 pdf
    BibTeX
    @article{epg-journal,
      author = {Ciosek, Kamil and Whiteson, Shimon},
      title = {Expected Policy Gradients for Reinforcement Learning},
      journal = {Journal of Machine Learning Research},
      year = {2020},
      volume = {21},
      number = {52},
      pages = {1-51},
      pdf = {http://www.jmlr.org/papers/volume21/18-012/18-012.pdf},
      note = {Submitted in January 2018}
    }
    
  14. Igl, M., Ciosek, K., Li, Y., Tschiatschek, S., Zhang, C., Devlin, S., & Hofmann, K. (2019). Generalization in Reinforcement Learning with Selective Noise Injection and Information Bottleneck. NeurIPS. pdf
    BibTeX
    @inproceedings{sni-rl,
      author = {Igl, Maximilian and Ciosek, Kamil and Li, Yingzhen and Tschiatschek, Sebastian and Zhang, Cheng and Devlin, Sam and Hofmann, Katja},
      title = {{G}eneralization in {R}einforcement {L}earning with {S}elective {N}oise {I}njection
                     and {I}nformation {B}ottleneck},
      booktitle = {NeurIPS},
      year = {2019},
      pdf = {https://arxiv.org/pdf/1910.12911}
    }
    
  15. Ciosek, K., Vuong, Q., Loftin, R., & Hofmann, K. (2019). Better Exploration with Optimistic Actor-Critic. NeurIPS. pdf
    BibTeX
    @inproceedings{oac-pg,
      author = {Ciosek, Kamil and Vuong, Quan and Loftin, Robert and Hofmann, Katja},
      title = {Better Exploration with Optimistic Actor-Critic},
      booktitle = {NeurIPS},
      year = {2019},
      pdf = {https://arxiv.org/pdf/1910.12807}
    }
    
  16. Fellows, M., Ciosek, K., & Whiteson, S. (2018). Fourier Policy Gradients. ICML. pdf
    BibTeX
    @inproceedings{fourier-pg,
      author = {Fellows, Matthew and Ciosek, Kamil and Whiteson, Shimon},
      booktitle = {ICML},
      date-added = {2018-02-25 15:17:14 +0000},
      date-modified = {2018-06-04 12:39:30 +0000},
      eprint = {1802.06891},
      pdf = {https://arxiv.org/pdf/1802.06891},
      title = {{F}ourier {P}olicy {G}radients},
      year = {2018}
    }
    
  17. Ciosek, K., & Whiteson, S. (2018). Expected Policy Gradients. The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18). pdf
    BibTeX
    @article{epg-aaai,
      author = {Ciosek, Kamil and Whiteson, Shimon},
      date-added = {2017-12-14 15:15:13 +0000},
      date-modified = {2017-12-14 16:18:43 +0000},
      journal = {The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)},
      pdf = {./dwnl/ciosek-whiteson-epg.pdf},
      title = {{E}xpected {P}olicy {G}radients},
      year = {2018}
    }
    
  18. Paul, S., Chatzilygeroudis, K., Ciosek, K., Mouret, J.-B., Osborne, M. A., & Whiteson, S. (2018). Alternating Optimisation and Quadrature for Robust Control. The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18). pdf
    BibTeX
    @article{aloq,
      author = {Paul, Supratik and Chatzilygeroudis, Konstantinos and Ciosek, Kamil and Mouret, Jean-Baptiste and Osborne, Michael A. and Whiteson, Shimon},
      date-added = {2017-12-14 15:17:17 +0000},
      date-modified = {2017-12-14 16:10:14 +0000},
      journal = {The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)},
      pdf = {./dwnl/aloq.pdf},
      title = {{A}lternating {O}ptimisation and {Q}uadrature for {R}obust {C}ontrol},
      year = {2018}
    }
    
  19. Ciosek, K., & Whiteson, S. (2017). OFFER: Off-Environment Reinforcement Learning. The Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17). pdf
    BibTeX
    @article{offer,
      author = {Ciosek, Kamil and Whiteson, Shimon},
      date-added = {2017-12-14 15:17:29 +0000},
      date-modified = {2017-12-14 16:18:23 +0000},
      journal = {The Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17)},
      pdf = {./dwnl/offer.pdf},
      title = {{OFFER}: {O}ff-{E}nvironment {R}einforcement {L}earning.},
      year = {2017}
    }
    
  20. Ciosek, K., & Whiteson, S. (2016). Off-Environment RL with Rare Events. NIPS Workshop on Optimizing the Optimizers. pdf
    BibTeX
    @article{off-env-wk,
      author = {Ciosek, Kamil and Whiteson, Shimon},
      date-added = {2017-12-14 16:17:37 +0000},
      date-modified = {2017-12-14 16:18:08 +0000},
      journal = {NIPS workshop on Optimizing the Optimizers},
      pdf = {./dwnl/offer.pdf},
      title = {{O}ff-{E}nvironment {RL} with {R}are {E}vents},
      year = {2016}
    }
    
  21. Ciosek, K., & Silver, D. (2015). Value Iteration with Options and State Aggregation. In Proceedings of the 5th Workshop on Planning and Learning, ICAPS. pdf
    BibTeX
    @article{opt-aggr,
      author = {Ciosek, Kamil and Silver, David},
      date-added = {2017-12-14 15:17:49 +0000},
      date-modified = {2017-12-14 16:19:13 +0000},
      journal = {In Proceedings of the 5th Workshop on Planning and Learning, ICAPS.},
      month = jun,
      pdf = {./dwnl/opt-aggr.pdf},
      title = {{V}alue {I}teration with {O}ptions and {S}tate {A}ggregation.},
      year = {2015}
    }
    
  22. Ciosek, K. (2015). Linear Reinforcement Learning with Options. [Ph. D. thesis]. University College London. pdf
    BibTeX
    @phdthesis{ciosek-thesis,
      author = {Ciosek, Kamil},
      date-added = {2017-12-14 15:20:33 +0000},
      date-modified = {2017-12-14 15:34:57 +0000},
      pdf = {./dwnl/phd-thesis.pdf},
      school = {University College London.},
      title = {{L}inear {R}einforcement {L}earning with {O}ptions.},
      type = {{P}h.\ {D}.\ thesis},
      year = {2015}
    }
    
  23. Silver, D., & Ciosek, K. (2012). Compositional Planning Using Optimal Option Models. ICML. pdf
    BibTeX
    @inproceedings{silver-ciosek-options,
      author = {Silver, David and Ciosek, Kamil},
      booktitle = {ICML},
      date-added = {2017-12-14 15:24:19 +0000},
      date-modified = {2017-12-14 16:19:37 +0000},
      pdf = {./dwnl/composition.pdf},
      title = {{C}ompositional {P}lanning {U}sing {O}ptimal {O}ption {M}odels},
      year = {2012}
    }
    
  24. Ciosek, K., & Kotowski, P. (2009). Generating 3D Plants using Lindenmayer System. GRAPP, 76–81. pdf
    BibTeX
    @inproceedings{lindenmayer-plants,
      author = {Ciosek, Kamil and Kotowski, Pawe{\l}},
      booktitle = {GRAPP},
      date-added = {2017-12-14 15:24:28 +0000},
      date-modified = {2017-12-14 16:19:49 +0000},
      pages = {76-81},
      pdf = {./dwnl/lindenmayer.pdf},
      title = {{G}enerating 3D {P}lants using {L}indenmayer {S}ystem},
      year = {2009}
    }