@article{EJP489,
author = {Damien Lamberton and Gilles Pagès},
title = {A penalized bandit algorithm},
journal = {Electron. J. Probab.},
fjournal = {Electronic Journal of Probability},
volume = {13},
year = {2008},
keywords = {Two-armed bandit algorithm; penalization; stochastic approximation; convergence rate; learning},
abstract = {We study a two armed-bandit recursive algorithm with penalty. We show that the algorithm converges towards its ``target" although it always has a noiseless ``trap". Then, we elucidate the rate of convergence. For some choices of the parameters, we obtain a central limit theorem in which the limit distribution is characterized as the unique stationary distribution of a Markov process with jumps.},
pages = {no. 13, 341-373},
issn = {1083-6489},
doi = {10.1214/EJP.v13-489},
url = {http://ejp.ejpecp.org/article/view/489}}