|
|
I am a machine learning (ML) researcher with a focus on reinforcement learning (RL). The kind of practical problem that I find most interesting is making decisions on the basis of finite data. Bandits, MDPs and their generalizations (POMDPs, games) are my main modeling tools and I am interested in solving them in novel and useful ways. In particular, I believe that finding the right ways to quantify uncertainty in complex deep RL models is one of the most promising approaches to improving the sample efficiency of synthetic controllers. I am also interested in imitation learning.
@inproceedings{tslt, title = { Impatient Bandits: Optimizing Recommendations for the Long-Term Without Delay }, author = {McDonald, Thomas and Maystre, Lucas and Lalmas, Mounia, and Russo, Daniel and Ciosek, Kamil}, booktitle = {KDD}, year = {2023} }
@inproceedings{mbrlp, title = { Automatic Music Playlist Generation via Simulation-based Reinforcement Learning }, author = {Tomasi, Federico and Cauteruccio, Joe and Kanoria, Surya and Ciosek, Kamil and Rinaldi, Matteo and Dai, Zhenwen}, booktitle = {KDD}, year = {2023} }
@inproceedings{ilr, title = { Imitation Learning by Reinforcement Learning }, author = {Ciosek, Kamil}, booktitle = {ICLR}, year = {2022}, pdf = { https://openreview.net/pdf?id=1zwleytEpYx } }
@inproceedings{idRL, title = { Information Directed Reward Learning for Reinforcement Learning }, author = {Lindner, David and Turchetta, Matteo and Tschiatschek, Sebastian and Ciosek, Kamil and Krause, Andreas}, booktitle = {NeurIPS}, year = {2021}, pdf = { https://openreview.net/pdf?id=t5-Mszu1UkO } }
@inproceedings{huseinPolicies, title = { Regularized Policies are Reward Robust }, author = {Husain, Hisham and Ciosek, Kamil and Tomioka, Ryota}, booktitle = {AISTATS}, year = {2021}, pdf = { http://proceedings.mlr.press/v130/husain21a/husain21a.pdf } }
@inproceedings{bayesianRLMeta, title = { Deep Interactive Bayesian Reinforcement Learning via Meta-Learning}, author = {Zintgraf, Luisa and Devlin, Sam and Ciosek, Kamil and Whiteson, Shimon and Hofmann, Katja }, booktitle = {AAMAS}, year = {2021}, pdf = { https://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1712.pdf } }
@inproceedings{knottRobustness, title = { Evaluating the Robustness of Collaborative Agents. }, author = {Knott, Paul and Carroll, Micah and Devlin, Sam and Ciosek, Kamil and Hofmann, Katja and Dragan, Anca and Shah, Rohin}, booktitle = {AAMAS}, year = {2021}, pdf = { https://www.ifaamas.org/Proceedings/aamas2021/pdfs/p1560.pdf } }
@inproceedings{rashid2021alpha, title = { Estimating alpha-Rank by Maximizing Information Gain }, author = {Rashid, Tabish and Zhang, Cheng and Ciosek, Kamil}, booktitle = {AAAI Conference on Artificial Intelligence}, year = {2021}, pdf = { https://arxiv.org/abs/2101.09178" } }
@inproceedings{multitask-batch-rl, author = {Li, Jiachen and Vuong, Quan and Liu, Shuang and Liu, Minghua and Ciosek, Kamil and Christensen, Henrik and Su, Hao}, title = {Multi-task Batch Reinforcement Learning with Metric Learning}, booktitle = {NeurIPS}, year = {2020}, pdf = {https://arxiv.org/pdf/1909.11373.pdf} }
@inproceedings{beck2020amrl, title = {{AMRL}: {A}ggregated {M}emory for {R}einforcement {L}earning}, author = {Beck, Jacob and Ciosek, Kamil and Devlin, Sam and Tschiatschek, Sebastian and Zhang, Cheng and Hofmann, Katja}, booktitle = {International Conference on Learning Representations}, year = {2020}, pdf = {https://openreview.net/pdf?id=Bkl7bREtDr} }
@inproceedings{ciosek2020conservative, title = {{C}onservative {U}ncertainty {E}stimation By {F}itting {P}rior {N}etworks}, author = {Ciosek, Kamil and Fortuin, Vincent and Tomioka, Ryota and Hofmann, Katja and Turner, Richard}, booktitle = {International Conference on Learning Representations}, year = {2020}, pdf = {https://openreview.net/pdf?id=BJlahxHYDS} }
@inproceedings{amit2020discount, title = {Discount Factor as a Regularizer in Reinforcement Learning}, author = {Amit, Ron and Meir, Ron and Ciosek, Kamil}, booktitle={International Conference on Machine Learning (ICML)}, pages = {2760--2769}, year={2020} }
@article{epg-journal, author = {Ciosek, Kamil and Whiteson, Shimon}, title = {Expected Policy Gradients for Reinforcement Learning}, journal = {Journal of Machine Learning Research}, year = {2020}, volume = {21}, number = {52}, pages = {1-51}, pdf = {http://www.jmlr.org/papers/volume21/18-012/18-012.pdf}, note = {Submitted in January 2018} }
@inproceedings{sni-rl, author = {Igl, Maximilian and Ciosek, Kamil and Li, Yingzhen and Tschiatschek, Sebastian and Zhang, Cheng and Devlin, Sam and Hofmann, Katja}, title = {{G}eneralization in {R}einforcement {L}earning with {S}elective {N}oise {I}njection and {I}nformation {B}ottleneck}, booktitle = {NeurIPS}, year = {2019}, pdf = {https://arxiv.org/pdf/1910.12911} }
@inproceedings{oac-pg, author = {Ciosek, Kamil and Vuong, Quan and Loftin, Robert and Hofmann, Katja}, title = {Better Exploration with Optimistic Actor-Critic}, booktitle = {NeurIPS}, year = {2019}, pdf = {https://arxiv.org/pdf/1910.12807} }
@inproceedings{fourier-pg, author = {Fellows, Matthew and Ciosek, Kamil and Whiteson, Shimon}, booktitle = {ICML}, date-added = {2018-02-25 15:17:14 +0000}, date-modified = {2018-06-04 12:39:30 +0000}, eprint = {1802.06891}, pdf = {https://arxiv.org/pdf/1802.06891}, title = {{F}ourier {P}olicy {G}radients}, year = {2018} }
@article{epg-aaai, author = {Ciosek, Kamil and Whiteson, Shimon}, date-added = {2017-12-14 15:15:13 +0000}, date-modified = {2017-12-14 16:18:43 +0000}, journal = {The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)}, pdf = {./dwnl/ciosek-whiteson-epg.pdf}, title = {{E}xpected {P}olicy {G}radients}, year = {2018} }
@article{aloq, author = {Paul, Supratik and Chatzilygeroudis, Konstantinos and Ciosek, Kamil and Mouret, Jean-Baptiste and Osborne, Michael A. and Whiteson, Shimon}, date-added = {2017-12-14 15:17:17 +0000}, date-modified = {2017-12-14 16:10:14 +0000}, journal = {The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18)}, pdf = {./dwnl/aloq.pdf}, title = {{A}lternating {O}ptimisation and {Q}uadrature for {R}obust {C}ontrol}, year = {2018} }
@article{offer, author = {Ciosek, Kamil and Whiteson, Shimon}, date-added = {2017-12-14 15:17:29 +0000}, date-modified = {2017-12-14 16:18:23 +0000}, journal = {The Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17)}, pdf = {./dwnl/offer.pdf}, title = {{OFFER}: {O}ff-{E}nvironment {R}einforcement {L}earning.}, year = {2017} }
@article{off-env-wk, author = {Ciosek, Kamil and Whiteson, Shimon}, date-added = {2017-12-14 16:17:37 +0000}, date-modified = {2017-12-14 16:18:08 +0000}, journal = {NIPS workshop on Optimizing the Optimizers}, pdf = {./dwnl/offer.pdf}, title = {{O}ff-{E}nvironment {RL} with {R}are {E}vents}, year = {2016} }
@article{opt-aggr, author = {Ciosek, Kamil and Silver, David}, date-added = {2017-12-14 15:17:49 +0000}, date-modified = {2017-12-14 16:19:13 +0000}, journal = {In Proceedings of the 5th Workshop on Planning and Learning, ICAPS.}, month = jun, pdf = {./dwnl/opt-aggr.pdf}, title = {{V}alue {I}teration with {O}ptions and {S}tate {A}ggregation.}, year = {2015} }
@phdthesis{ciosek-thesis, author = {Ciosek, Kamil}, date-added = {2017-12-14 15:20:33 +0000}, date-modified = {2017-12-14 15:34:57 +0000}, pdf = {./dwnl/phd-thesis.pdf}, school = {University College London.}, title = {{L}inear {R}einforcement {L}earning with {O}ptions.}, type = {{P}h.\ {D}.\ thesis}, year = {2015} }
@inproceedings{silver-ciosek-options, author = {Silver, David and Ciosek, Kamil}, booktitle = {ICML}, date-added = {2017-12-14 15:24:19 +0000}, date-modified = {2017-12-14 16:19:37 +0000}, pdf = {./dwnl/composition.pdf}, title = {{C}ompositional {P}lanning {U}sing {O}ptimal {O}ption {M}odels}, year = {2012} }
@inproceedings{lindenmayer-plants, author = {Ciosek, Kamil and Kotowski, Pawe{\l}}, booktitle = {GRAPP}, date-added = {2017-12-14 15:24:28 +0000}, date-modified = {2017-12-14 16:19:49 +0000}, pages = {76-81}, pdf = {./dwnl/lindenmayer.pdf}, title = {{G}enerating 3D {P}lants using {L}indenmayer {S}ystem}, year = {2009} }