|
|
I am a machine learning (ML) generalist, with a focus on Reinforcement Learning, Bayesian Modelling and Large Language Models. I am also interested in the theory of deep learning, particularly the links to Bayesian inference from the angle of the Neural Tangent Kernel. While I maintain an ongoing interest in the fundamentals, I apply my research to recommendation problems.
@article{
ciosek2025hallucinations,
title={Hallucination Detection on a Budget: Efficient Bayesian Estimation of Semantic Entropy},
author={Kamil Ciosek and Nicol{\`o} Felicioni and Sina Ghiassian},
journal={Transactions on Machine Learning Research},
year={2025},
url={https://openreview.net/forum?id=j2N2RuNdbC},
note={}
}
@article{
felicioni2024on,
title={On the Importance of Uncertainty in Decision-Making with Large Language Models},
author={Nicol{\`o} Felicioni and Lucas Maystre and Sina Ghiassian and Kamil Ciosek},
journal={Transactions on Machine Learning Research},
year={2024},
url={https://openreview.net/forum?id=YfPzUX6DdO},
note={}
}
@inproceedings{tslt,
title = { Impatient Bandits: Optimizing Recommendations for the Long-Term Without Delay },
author = {McDonald, Thomas and Maystre, Lucas and Lalmas, Mounia, and Russo, Daniel and Ciosek, Kamil},
booktitle = {KDD},
year = {2023}
}
@inproceedings{mbrlp,
title = { Automatic Music Playlist Generation via Simulation-based Reinforcement Learning },
author = {Tomasi, Federico and Cauteruccio, Joe and Kanoria, Surya and Ciosek, Kamil and Rinaldi, Matteo and Dai, Zhenwen},
booktitle = {KDD},
year = {2023}
}
@inproceedings{ilr,
title = { Imitation Learning by Reinforcement Learning },
author = {Ciosek, Kamil},
booktitle = {ICLR},
year = {2022},
pdf = { https://openreview.net/pdf?id=1zwleytEpYx }
}
@inproceedings{idRL,
title = { Information Directed Reward Learning for Reinforcement Learning },
author = {Lindner, David and Turchetta, Matteo and Tschiatschek, Sebastian and Ciosek, Kamil and Krause, Andreas},
booktitle = {NeurIPS},
year = {2021},
pdf = { https://openreview.net/pdf?id=t5-Mszu1UkO }
}
@inproceedings{huseinPolicies,
title = { Regularized Policies are Reward Robust },
author = {Husain, Hisham and Ciosek, Kamil and Tomioka, Ryota},
booktitle = {AISTATS},
year = {2021},
pdf = { http://proceedings.mlr.press/v130/husain21a/husain21a.pdf }
}
@inproceedings{bayesianRLMeta,
title = { Deep Interactive Bayesian Reinforcement Learning via Meta-Learning},
author = {Zintgraf, Luisa and Devlin, Sam and Ciosek, Kamil and Whiteson, Shimon and Hofmann, Katja },
booktitle = {AAMAS},
year = {2021},
pdf = { https://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1712.pdf }
}
@inproceedings{knottRobustness,
title = { Evaluating the Robustness of Collaborative Agents. },
author = {Knott, Paul and Carroll, Micah and Devlin, Sam and Ciosek, Kamil and Hofmann, Katja and Dragan, Anca and Shah, Rohin},
booktitle = {AAMAS},
year = {2021},
pdf = { https://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1560.pdf }
}
@inproceedings{rashid2021alpha,
title = { Estimating alpha-Rank by Maximizing Information Gain },
author = {Rashid, Tabish and Zhang, Cheng and Ciosek, Kamil},
booktitle = {AAAI Conference on Artificial Intelligence},
year = {2021},
pdf = { https://arxiv.org/abs/2101.09178" }
}
@inproceedings{multitask-batch-rl,
author = {Li, Jiachen and Vuong, Quan and Liu, Shuang and Liu, Minghua and Ciosek, Kamil and Christensen, Henrik and Su, Hao},
title = {Multi-task Batch Reinforcement Learning with Metric Learning},
booktitle = {NeurIPS},
year = {2020},
pdf = {https://arxiv.org/pdf/1909.11373.pdf}
}
@inproceedings{beck2020amrl,
title = {{AMRL}: {A}ggregated {M}emory for {R}einforcement {L}earning},
author = {Beck, Jacob and Ciosek, Kamil and Devlin, Sam and Tschiatschek, Sebastian and Zhang, Cheng and Hofmann, Katja},
booktitle = {International Conference on Learning Representations},
year = {2020},
pdf = {https://openreview.net/pdf?id=Bkl7bREtDr}
}
@inproceedings{ciosek2020conservative,
title = {{C}onservative {U}ncertainty {E}stimation By {F}itting {P}rior {N}etworks},
author = {Ciosek, Kamil and Fortuin, Vincent and Tomioka, Ryota and Hofmann, Katja and Turner, Richard},
booktitle = {International Conference on Learning Representations},
year = {2020},
pdf = {https://openreview.net/pdf?id=BJlahxHYDS}
}
@inproceedings{amit2020discount,
title = {Discount Factor as a Regularizer in Reinforcement Learning},
author = {Amit, Ron and Meir, Ron and Ciosek, Kamil},
booktitle={International Conference on Machine Learning (ICML)},
pages = {2760--2769},
year={2020}
}
@article{epg-journal,
author = {Ciosek, Kamil and Whiteson, Shimon},
title = {Expected Policy Gradients for Reinforcement Learning},
journal = {Journal of Machine Learning Research},
year = {2020},
volume = {21},
number = {52},
pages = {1-51},
pdf = {http://www.jmlr.org/papers/volume21/18-012/18-012.pdf},
note = {Submitted in January 2018}
}
@inproceedings{sni-rl,
author = {Igl, Maximilian and Ciosek, Kamil and Li, Yingzhen and Tschiatschek, Sebastian and Zhang, Cheng and Devlin, Sam and Hofmann, Katja},
title = {{G}eneralization in {R}einforcement {L}earning with {S}elective {N}oise {I}njection
and {I}nformation {B}ottleneck},
booktitle = {NeurIPS},
year = {2019},
pdf = {https://arxiv.org/pdf/1910.12911}
}
@inproceedings{oac-pg,
author = {Ciosek, Kamil and Vuong, Quan and Loftin, Robert and Hofmann, Katja},
title = {Better Exploration with Optimistic Actor-Critic},
booktitle = {NeurIPS},
year = {2019},
pdf = {https://arxiv.org/pdf/1910.12807}
}
@inproceedings{fourier-pg,
author = {Fellows, Matthew and Ciosek, Kamil and Whiteson, Shimon},
booktitle = {ICML},
date-added = {2018-02-25 15:17:14 +0000},
date-modified = {2018-06-04 12:39:30 +0000},
eprint = {1802.06891},
pdf = {https://arxiv.org/pdf/1802.06891},
title = {{F}ourier {P}olicy {G}radients},
year = {2018}
}
@article{epg-aaai,
author = {Ciosek, Kamil and Whiteson, Shimon},
date-added = {2017-12-14 15:15:13 +0000},
date-modified = {2017-12-14 16:18:43 +0000},
journal = {The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)},
pdf = {./dwnl/ciosek-whiteson-epg.pdf},
title = {{E}xpected {P}olicy {G}radients},
year = {2018}
}
@article{aloq,
author = {Paul, Supratik and Chatzilygeroudis, Konstantinos and Ciosek, Kamil and Mouret, Jean-Baptiste and Osborne, Michael A. and Whiteson, Shimon},
date-added = {2017-12-14 15:17:17 +0000},
date-modified = {2017-12-14 16:10:14 +0000},
journal = {The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)},
pdf = {./dwnl/aloq.pdf},
title = {{A}lternating {O}ptimisation and {Q}uadrature for {R}obust {C}ontrol},
year = {2018}
}
@article{offer,
author = {Ciosek, Kamil and Whiteson, Shimon},
date-added = {2017-12-14 15:17:29 +0000},
date-modified = {2017-12-14 16:18:23 +0000},
journal = {The Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17)},
pdf = {./dwnl/offer.pdf},
title = {{OFFER}: {O}ff-{E}nvironment {R}einforcement {L}earning.},
year = {2017}
}
@article{off-env-wk,
author = {Ciosek, Kamil and Whiteson, Shimon},
date-added = {2017-12-14 16:17:37 +0000},
date-modified = {2017-12-14 16:18:08 +0000},
journal = {NIPS workshop on Optimizing the Optimizers},
pdf = {./dwnl/offer.pdf},
title = {{O}ff-{E}nvironment {RL} with {R}are {E}vents},
year = {2016}
}
@article{opt-aggr,
author = {Ciosek, Kamil and Silver, David},
date-added = {2017-12-14 15:17:49 +0000},
date-modified = {2017-12-14 16:19:13 +0000},
journal = {In Proceedings of the 5th Workshop on Planning and Learning, ICAPS.},
month = jun,
pdf = {./dwnl/opt-aggr.pdf},
title = {{V}alue {I}teration with {O}ptions and {S}tate {A}ggregation.},
year = {2015}
}
@phdthesis{ciosek-thesis,
author = {Ciosek, Kamil},
date-added = {2017-12-14 15:20:33 +0000},
date-modified = {2017-12-14 15:34:57 +0000},
pdf = {./dwnl/phd-thesis.pdf},
school = {University College London.},
title = {{L}inear {R}einforcement {L}earning with {O}ptions.},
type = {{P}h.\ {D}.\ thesis},
year = {2015}
}
@inproceedings{silver-ciosek-options,
author = {Silver, David and Ciosek, Kamil},
booktitle = {ICML},
date-added = {2017-12-14 15:24:19 +0000},
date-modified = {2017-12-14 16:19:37 +0000},
pdf = {./dwnl/composition.pdf},
title = {{C}ompositional {P}lanning {U}sing {O}ptimal {O}ption {M}odels},
year = {2012}
}
@inproceedings{lindenmayer-plants,
author = {Ciosek, Kamil and Kotowski, Pawe{\l}},
booktitle = {GRAPP},
date-added = {2017-12-14 15:24:28 +0000},
date-modified = {2017-12-14 16:19:49 +0000},
pages = {76-81},
pdf = {./dwnl/lindenmayer.pdf},
title = {{G}enerating 3D {P}lants using {L}indenmayer {S}ystem},
year = {2009}
}