reinforcement_learning/hw4_model_selection_bandits/bibliography.bib · MVA-2021

@inproceedings{AgarwalDKLS12,
  author    = {Alekh Agarwal and
               Miroslav Dud{\'{\i}}k and
               Satyen Kale and
               John Langford and
               Robert E. Schapire},
  title     = {Contextual Bandit Learning with Predictable Rewards},
  booktitle = {{AISTATS}},
  series    = {{JMLR} Proceedings},
  volume    = {22},
  pages     = {19--26},
  publisher = {JMLR.org},
  year      = {2012}
}

@inproceedings{AgarwalLNS17,
  author    = {Alekh Agarwal and
               Haipeng Luo and
               Behnam Neyshabur and
               Robert E. Schapire},
  title     = {Corralling a Band of Bandit Algorithms},
  booktitle = {{COLT}},
  series    = {Proceedings of Machine Learning Research},
  volume    = {65},
  pages     = {12--38},
  publisher = {{PMLR}},
  year      = {2017}
}
@article{SinglaH017,
  author    = {Adish Singla and
               Seyed Hamed Hassani and
               Andreas Krause},
  title     = {Learning to Use Learners' Advice},
  journal   = {CoRR},
  volume    = {abs/1702.04825},
  year      = {2017}
}

@inproceedings{PacchianoPA0ZLS20,
  author    = {Aldo Pacchiano and
               My Phan and
               Yasin Abbasi{-}Yadkori and
               Anup Rao and
               Julian Zimmert and
               Tor Lattimore and
               Csaba Szepesv{\'{a}}ri},
  title     = {Model Selection in Contextual Stochastic Bandit Problems},
  booktitle = {NeurIPS},
  year      = {2020}
}

@article{AbbasiYadkori2020regrebalancing,
  author    = {Yasin Abbasi{-}Yadkori and
               Aldo Pacchiano and
               My Phan},
  title     = {Regret Balancing for Bandit and {RL} Model Selection},
  journal   = {CoRR},
  volume    = {abs/2006.05491},
  year      = {2020}
}

@article{pacchiano2020regret,
  author    = {Aldo Pacchiano and
               Christoph Dann and
               Claudio Gentile and
               Peter L. Bartlett},
  title     = {Regret Bound Balancing and Elimination for Model Selection in Bandits
               and {RL}},
  journal   = {CoRR},
  volume    = {abs/2012.13045},
  year      = {2020}
}
}