Kamil Ciosek

About Me

I am a machine learning (ML) generalist, with a focus on Reinforcement Learning, Bayesian Modelling and Large Language Models. I am also interested in the theory of deep learning, particularly the links to Bayesian inference from the angle of the Neural Tangent Kernel. While I maintain an ongoing interest in the fundamentals, I apply my research to recommendation problems.

Preprints

Linear Gradient Prediction with Control Variates by Kamil Ciosek, Nicolò Felicioni, Juan Elenter Litwin
Observation Noise and Initialization in Wide Neural Networks by Sergio Calvo-Ordoñez, Jonathan Plenk, Richard Bergna, Alvaro Cartea, Jose Miguel Hernandez-Lobato, Konstantina Palla, Kamil Ciosek
Impatient Bandits: Optimizing for the Long-Term Without Delay (journal version) by Kelly W. Zhang, Thomas Baldwin-McDonald, Kamil Ciosek, Lucas Maystre, Daniel Russo

Publications

Ciosek, K, Felicioni, N., Ghiassian, S.. Hallucination Detection on a Budget: Efficient Bayesian Estimation of Semantic Entropy TMLR, 2025. pdf

BibTeX

@article{
ciosek2025hallucinations,
title={Hallucination Detection on a Budget: Efficient Bayesian Estimation of Semantic Entropy},
author={Kamil Ciosek and Nicol{\`o} Felicioni and Sina Ghiassian},
journal={Transactions on Machine Learning Research},
year={2025},
url={https://openreview.net/forum?id=j2N2RuNdbC},
note={}
}

Felicioni, N., Maystre, L., Ghiassian, S., Ciosek, K. On the Importance of Uncertainty in Decision-Making with Large Language Models TMLR, 2024. pdf

BibTeX

@article{
felicioni2024on,
title={On the Importance of Uncertainty in Decision-Making with Large Language Models},
author={Nicol{\`o} Felicioni and Lucas Maystre and Sina Ghiassian and Kamil Ciosek},
journal={Transactions on Machine Learning Research},
year={2024},
url={https://openreview.net/forum?id=YfPzUX6DdO},
note={}
}

McDonald, T., Maystre, L., Lalmas, M., Russo, D., Ciosek, K. Impatient Bandits: Optimizing Recommendations for the Long-Term Without Delay KDD 2023. pdf

BibTeX

@inproceedings{tslt,
title = { Impatient Bandits: Optimizing Recommendations for the Long-Term Without Delay },
author = {McDonald, Thomas and Maystre, Lucas and Lalmas, Mounia, and Russo, Daniel and Ciosek, Kamil},
booktitle = {KDD},
year = {2023}
}

Tomasi, F., Cauteruccio, J., Kanoria, S., Ciosek, K., Rinaldi, M., Dai, Z. Automatic Music Playlist Generation via Simulation-based Reinforcement Learning KDD 2023. pdf

BibTeX

@inproceedings{mbrlp,
title = { Automatic Music Playlist Generation via Simulation-based Reinforcement Learning },
author = {Tomasi, Federico and Cauteruccio, Joe and Kanoria, Surya and Ciosek, Kamil and Rinaldi, Matteo and Dai, Zhenwen},
booktitle = {KDD},
year = {2023}
}

Ciosek, K. Imitation Learning by Reinforcement Learning ICLR 2022. pdf code

BibTeX

@inproceedings{ilr,
title = { Imitation Learning by Reinforcement Learning },
author = {Ciosek, Kamil},
booktitle = {ICLR},
year = {2022},
pdf = { https://openreview.net/pdf?id=1zwleytEpYx }
}

Lindner, D., Turchetta, M., Tschiatschek, S., Ciosek, K., Krause, A. (2021) Information Directed Reward Learning for Reinforcement Learning NeurIPS 2021. pdf

BibTeX

@inproceedings{idRL,
  title = { Information Directed Reward Learning for Reinforcement Learning },
  author = {Lindner, David and Turchetta, Matteo and Tschiatschek, Sebastian and Ciosek, Kamil and Krause, Andreas},
  booktitle = {NeurIPS},
  year = {2021},
  pdf = { https://openreview.net/pdf?id=t5-Mszu1UkO }
  }

Husain, H., Ciosek, K., Tomioka, R (2021) Regularized Policies are Reward Robust AISTATS 2021. pdf

BibTeX

@inproceedings{huseinPolicies,
    title = { Regularized Policies are Reward Robust },
    author = {Husain, Hisham and Ciosek, Kamil and Tomioka, Ryota},
    booktitle = {AISTATS},
    year = {2021},
    pdf = { http://proceedings.mlr.press/v130/husain21a/husain21a.pdf }
    }

Zintgraf, L., Devlin, S., Ciosek, K., Whiteson, S., Hofmann, K. (2021) Deep Interactive Bayesian Reinforcement Learning via Meta-Learning AAMAS 2021. pdf

BibTeX

@inproceedings{bayesianRLMeta,
title = { Deep Interactive Bayesian Reinforcement Learning via Meta-Learning},
author = {Zintgraf, Luisa and Devlin, Sam and Ciosek, Kamil and Whiteson, Shimon and Hofmann, Katja },
booktitle = {AAMAS},
year = {2021},
pdf = { https://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1712.pdf }
}

Knott, P., Carroll, M., Devlin, S., Ciosek, K., Hofmann, K., Dragan, A., Shah, R.(2021) Evaluating the Robustness of Collaborative Agents. AAMAS 2021. pdf

BibTeX

@inproceedings{knottRobustness,
title = { Evaluating the Robustness of Collaborative Agents. },
author = {Knott, Paul and Carroll, Micah and Devlin, Sam and Ciosek, Kamil and Hofmann, Katja and Dragan, Anca and Shah, Rohin},
booktitle = {AAMAS},
year = {2021},
pdf = { https://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1560.pdf }
}

Rashid, T., Zhang, C., Ciosek, K. (2021). Estimating α-Rank by Maximizing Information Gain AAAI 2021. pdf

BibTeX

@inproceedings{rashid2021alpha,
  title = { Estimating alpha-Rank by Maximizing Information Gain },
  author = {Rashid, Tabish and Zhang, Cheng and Ciosek, Kamil},
  booktitle = {AAAI Conference on Artificial Intelligence},
  year = {2021},
  pdf = { https://arxiv.org/abs/2101.09178" }
}

Li, J., Vuong, Q., Liu, S., Liu, M., Ciosek, K., Christensen, H., & Su, H. (2020). Multi-task Batch Reinforcement Learning with Metric Learning. NeurIPS. pdf

BibTeX

@inproceedings{multitask-batch-rl,
  author = {Li, Jiachen and Vuong, Quan and Liu, Shuang and Liu, Minghua and Ciosek, Kamil and Christensen, Henrik and Su, Hao},
  title = {Multi-task Batch Reinforcement Learning with Metric Learning},
  booktitle = {NeurIPS},
  year = {2020},
  pdf = {https://arxiv.org/pdf/1909.11373.pdf}
}

Beck, J., Ciosek, K., Devlin, S., Tschiatschek, S., Zhang, C., & Hofmann, K. (2020). AMRL: Aggregated Memory for Reinforcement Learning. International Conference on Learning Representations. pdf

BibTeX

@inproceedings{beck2020amrl,
  title = {{AMRL}: {A}ggregated {M}emory for {R}einforcement {L}earning},
  author = {Beck, Jacob and Ciosek, Kamil and Devlin, Sam and Tschiatschek, Sebastian and Zhang, Cheng and Hofmann, Katja},
  booktitle = {International Conference on Learning Representations},
  year = {2020},
  pdf = {https://openreview.net/pdf?id=Bkl7bREtDr}
}

Ciosek, K., Fortuin, V., Tomioka, R., Hofmann, K., & Turner, R. (2020). Conservative Uncertainty Estimation By Fitting Prior Networks. International Conference on Learning Representations. pdf

BibTeX

@inproceedings{ciosek2020conservative,
  title = {{C}onservative {U}ncertainty {E}stimation By {F}itting  {P}rior {N}etworks},
  author = {Ciosek, Kamil and Fortuin, Vincent and Tomioka, Ryota and Hofmann, Katja and Turner, Richard},
  booktitle = {International Conference on Learning Representations},
  year = {2020},
  pdf = {https://openreview.net/pdf?id=BJlahxHYDS}
}

R. Amit, R. Meir, K. Ciosek, “Discount Factor as a Regularizer in Reinforcement Learning” ICML 2020. pdf

BibTeX

@inproceedings{amit2020discount,
  title = {Discount Factor as a Regularizer in Reinforcement Learning},
  author = {Amit, Ron and Meir, Ron and Ciosek, Kamil},
  booktitle={International Conference on Machine Learning (ICML)},
  pages = {2760--2769},
  year={2020}
}

Ciosek, K., & Whiteson, S. (2020). Expected Policy Gradients for Reinforcement Learning. Journal of Machine Learning Research, 21(52), 1–51. Submitted in January 2018 pdf

BibTeX

@article{epg-journal,
  author = {Ciosek, Kamil and Whiteson, Shimon},
  title = {Expected Policy Gradients for Reinforcement Learning},
  journal = {Journal of Machine Learning Research},
  year = {2020},
  volume = {21},
  number = {52},
  pages = {1-51},
  pdf = {http://www.jmlr.org/papers/volume21/18-012/18-012.pdf},
  note = {Submitted in January 2018}
}

Igl, M., Ciosek, K., Li, Y., Tschiatschek, S., Zhang, C., Devlin, S., & Hofmann, K. (2019). Generalization in Reinforcement Learning with Selective Noise Injection and Information Bottleneck. NeurIPS. pdf

BibTeX

@inproceedings{sni-rl,
  author = {Igl, Maximilian and Ciosek, Kamil and Li, Yingzhen and Tschiatschek, Sebastian and Zhang, Cheng and Devlin, Sam and Hofmann, Katja},
  title = {{G}eneralization in {R}einforcement {L}earning with {S}elective {N}oise {I}njection
                 and {I}nformation {B}ottleneck},
  booktitle = {NeurIPS},
  year = {2019},
  pdf = {https://arxiv.org/pdf/1910.12911}
}

Ciosek, K., Vuong, Q., Loftin, R., & Hofmann, K. (2019). Better Exploration with Optimistic Actor-Critic. NeurIPS. pdf

BibTeX

@inproceedings{oac-pg,
  author = {Ciosek, Kamil and Vuong, Quan and Loftin, Robert and Hofmann, Katja},
  title = {Better Exploration with Optimistic Actor-Critic},
  booktitle = {NeurIPS},
  year = {2019},
  pdf = {https://arxiv.org/pdf/1910.12807}
}

Fellows, M., Ciosek, K., & Whiteson, S. (2018). Fourier Policy Gradients. ICML. pdf

BibTeX

@inproceedings{fourier-pg,
  author = {Fellows, Matthew and Ciosek, Kamil and Whiteson, Shimon},
  booktitle = {ICML},
  date-added = {2018-02-25 15:17:14 +0000},
  date-modified = {2018-06-04 12:39:30 +0000},
  eprint = {1802.06891},
  pdf = {https://arxiv.org/pdf/1802.06891},
  title = {{F}ourier {P}olicy {G}radients},
  year = {2018}
}

Ciosek, K., & Whiteson, S. (2018). Expected Policy Gradients. The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18). pdf

BibTeX

@article{epg-aaai,
  author = {Ciosek, Kamil and Whiteson, Shimon},
  date-added = {2017-12-14 15:15:13 +0000},
  date-modified = {2017-12-14 16:18:43 +0000},
  journal = {The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)},
  pdf = {./dwnl/ciosek-whiteson-epg.pdf},
  title = {{E}xpected {P}olicy {G}radients},
  year = {2018}
}

Paul, S., Chatzilygeroudis, K., Ciosek, K., Mouret, J.-B., Osborne, M. A., & Whiteson, S. (2018). Alternating Optimisation and Quadrature for Robust Control. The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18). pdf

BibTeX

@article{aloq,
  author = {Paul, Supratik and Chatzilygeroudis, Konstantinos and Ciosek, Kamil and Mouret, Jean-Baptiste and Osborne, Michael A. and Whiteson, Shimon},
  date-added = {2017-12-14 15:17:17 +0000},
  date-modified = {2017-12-14 16:10:14 +0000},
  journal = {The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)},
  pdf = {./dwnl/aloq.pdf},
  title = {{A}lternating {O}ptimisation and {Q}uadrature for {R}obust {C}ontrol},
  year = {2018}
}

Ciosek, K., & Whiteson, S. (2017). OFFER: Off-Environment Reinforcement Learning. The Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17). pdf

BibTeX

@article{offer,
  author = {Ciosek, Kamil and Whiteson, Shimon},
  date-added = {2017-12-14 15:17:29 +0000},
  date-modified = {2017-12-14 16:18:23 +0000},
  journal = {The Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17)},
  pdf = {./dwnl/offer.pdf},
  title = {{OFFER}: {O}ff-{E}nvironment {R}einforcement {L}earning.},
  year = {2017}
}

Ciosek, K., & Whiteson, S. (2016). Off-Environment RL with Rare Events. NIPS Workshop on Optimizing the Optimizers. pdf

BibTeX

@article{off-env-wk,
  author = {Ciosek, Kamil and Whiteson, Shimon},
  date-added = {2017-12-14 16:17:37 +0000},
  date-modified = {2017-12-14 16:18:08 +0000},
  journal = {NIPS workshop on Optimizing the Optimizers},
  pdf = {./dwnl/offer.pdf},
  title = {{O}ff-{E}nvironment {RL} with {R}are {E}vents},
  year = {2016}
}

Ciosek, K., & Silver, D. (2015). Value Iteration with Options and State Aggregation. In Proceedings of the 5th Workshop on Planning and Learning, ICAPS. pdf

BibTeX

@article{opt-aggr,
  author = {Ciosek, Kamil and Silver, David},
  date-added = {2017-12-14 15:17:49 +0000},
  date-modified = {2017-12-14 16:19:13 +0000},
  journal = {In Proceedings of the 5th Workshop on Planning and Learning, ICAPS.},
  month = jun,
  pdf = {./dwnl/opt-aggr.pdf},
  title = {{V}alue {I}teration with {O}ptions and {S}tate {A}ggregation.},
  year = {2015}
}

Ciosek, K. (2015). Linear Reinforcement Learning with Options. [Ph. D. thesis]. University College London. pdf

BibTeX

@phdthesis{ciosek-thesis,
  author = {Ciosek, Kamil},
  date-added = {2017-12-14 15:20:33 +0000},
  date-modified = {2017-12-14 15:34:57 +0000},
  pdf = {./dwnl/phd-thesis.pdf},
  school = {University College London.},
  title = {{L}inear {R}einforcement {L}earning with {O}ptions.},
  type = {{P}h.\ {D}.\ thesis},
  year = {2015}
}

Silver, D., & Ciosek, K. (2012). Compositional Planning Using Optimal Option Models. ICML. pdf

BibTeX

@inproceedings{silver-ciosek-options,
  author = {Silver, David and Ciosek, Kamil},
  booktitle = {ICML},
  date-added = {2017-12-14 15:24:19 +0000},
  date-modified = {2017-12-14 16:19:37 +0000},
  pdf = {./dwnl/composition.pdf},
  title = {{C}ompositional {P}lanning {U}sing {O}ptimal {O}ption {M}odels},
  year = {2012}
}

Ciosek, K., & Kotowski, P. (2009). Generating 3D Plants using Lindenmayer System. GRAPP, 76–81. pdf

BibTeX

@inproceedings{lindenmayer-plants,
  author = {Ciosek, Kamil and Kotowski, Pawe{\l}},
  booktitle = {GRAPP},
  date-added = {2017-12-14 15:24:28 +0000},
  date-modified = {2017-12-14 16:19:49 +0000},
  pages = {76-81},
  pdf = {./dwnl/lindenmayer.pdf},
  title = {{G}enerating 3D {P}lants using {L}indenmayer {S}ystem},
  year = {2009}
}