@inproceedings{AgarwalDKLS12, author = {Alekh Agarwal and Miroslav Dud{\'{\i}}k and Satyen Kale and John Langford and Robert E. Schapire}, title = {Contextual Bandit Learning with Predictable Rewards}, booktitle = {{AISTATS}}, series = {{JMLR} Proceedings}, volume = {22}, pages = {19--26}, publisher = {JMLR.org}, year = {2012} } @inproceedings{AgarwalLNS17, author = {Alekh Agarwal and Haipeng Luo and Behnam Neyshabur and Robert E. Schapire}, title = {Corralling a Band of Bandit Algorithms}, booktitle = {{COLT}}, series = {Proceedings of Machine Learning Research}, volume = {65}, pages = {12--38}, publisher = {{PMLR}}, year = {2017} } @article{SinglaH017, author = {Adish Singla and Seyed Hamed Hassani and Andreas Krause}, title = {Learning to Use Learners' Advice}, journal = {CoRR}, volume = {abs/1702.04825}, year = {2017} } @inproceedings{PacchianoPA0ZLS20, author = {Aldo Pacchiano and My Phan and Yasin Abbasi{-}Yadkori and Anup Rao and Julian Zimmert and Tor Lattimore and Csaba Szepesv{\'{a}}ri}, title = {Model Selection in Contextual Stochastic Bandit Problems}, booktitle = {NeurIPS}, year = {2020} } @article{AbbasiYadkori2020regrebalancing, author = {Yasin Abbasi{-}Yadkori and Aldo Pacchiano and My Phan}, title = {Regret Balancing for Bandit and {RL} Model Selection}, journal = {CoRR}, volume = {abs/2006.05491}, year = {2020} } @article{pacchiano2020regret, author = {Aldo Pacchiano and Christoph Dann and Claudio Gentile and Peter L. Bartlett}, title = {Regret Bound Balancing and Elimination for Model Selection in Bandits and {RL}}, journal = {CoRR}, volume = {abs/2012.13045}, year = {2020} } }