# -*- coding: utf-8 -*- """Assignment3.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1rDTGiQpC4NrlCzreiQktEdl32mJOmgmR """ import numpy as np import matplotlib.pyplot as plt from numba.decorators import jit from math import log, sqrt @jit def bernoulli(mu_arm): return np.random.binomial(n=1, p=mu_arm) """# UCB""" @jit def ucb(horizon, n_arms, true_mus): n = horizon m = n_arms assert(len(true_mus) == m) deltas = np.amax(true_mus)-true_mus mus = np.zeros(m) pulls = np.zeros(m) # init by taking each arm once for t in range(m): pulls[t] += 1 mus[t] = bernoulli(true_mus[t]) # ucb loop for t in range(m, n): # compute upper-confidence-bound ucbs = mus + np.sqrt( log(1+t*log(t)**2)/(2*pulls) ) # take action i = np.argmax(ucbs) # update statistics pulls[i] += 1 mus[i] = ( (pulls[i]-1)*mus[i] + bernoulli(true_mus[i]) ) / pulls[i] # return regret return np.sum(pulls*deltas) """# KL-UCB""" @jit def kl(x, y, eps=1e-15): x = min(max(x, eps), 1 - eps) y = min(max(y, eps), 1 - eps) return x * log(x / y) + (1 - x) * log((1 - x) / (1 - y)) @jit def solve_inner_max(mu, pull, t, eps=1e-4, max_iter=50): l, r = mu, 1 val= np.inf iter = 0 d = log(1+t*log(t)**2)/pull while np.abs(val)>eps and iterd: r=mid else: l=mid iter+=1 return mid @jit def klucb(horizon, n_arms, true_mus): n = horizon m = n_arms assert(len(true_mus) == m) deltas = np.amax(true_mus)-true_mus mus = np.zeros(m) pulls = np.zeros(m) # init by taking each arm once for t in range(m): pulls[t] += 1 mus[t] = bernoulli(true_mus[t]) # ucb loop for t in range(m, n): # compute kullback-leibler-upper-confidence-bound ucbs = np.zeros(m) for j in range(m): ucbs[j] = solve_inner_max(mus[j], pulls[j], t) # take action i = np.argmax(ucbs) # update statistics pulls[i] += 1 mus[i] = ( (pulls[i]-1)*mus[i] + bernoulli(true_mus[i]) ) / pulls[i] # return regret return np.sum(pulls*deltas) def expected_regret(method, mu1, delta, horizon=10000, n_arms=2, n_iter=100): regret = 0 for _ in range(n_iter): if method == 'ucb': regret += ucb(horizon,n_arms, np.array([mu1, mu1+delta])) else: regret += klucb(horizon, n_arms, np.array([mu1, mu1+delta])) return regret / n_iter x = np.arange(-0.5,0.5, 0.01) y_ucb = np.zeros(len(x)) y_klucb = np.zeros(len(x)) for i in range(len(x)): if i%10 == 0: print('computing step ', i, ' ...') y_ucb[i] = expected_regret('ucb', 0.5, x[i]) y_klucb[i] = expected_regret('klucb', 0.5, x[i]) plt.plot(x, y_ucb, label='ucb') plt.plot(x, y_klucb, label='kl-ucb') plt.legend() plt.title(r'KL-UCB vs UCB for $\mu_1 =0.5$') plt.show() x = np.arange(-0.1,0.9, 0.01) y_ucb = np.zeros(len(x)) y_klucb = np.zeros(len(x)) for i in range(len(x)): if i%10 == 0: print('computing step ', i, ' ...') y_ucb[i] = expected_regret('ucb', 0.1, x[i]) y_klucb[i] = expected_regret('klucb', 0.1, x[i]) plt.plot(x, y_ucb, label='ucb') plt.plot(x, y_klucb, label='kl-ucb') plt.legend() plt.title(r'KL-UCB vs UCB for $\mu_1 =0.1$') plt.show() x = np.arange(-0.9,0.1, 0.01) y_ucb = np.zeros(len(x)) y_klucb = np.zeros(len(x)) for i in range(len(x)): if i%10 == 0: print('computing step ', i, ' ...') y_ucb[i] = expected_regret('ucb', 0.9, x[i]) y_klucb[i] = expected_regret('klucb', 0.9, x[i]) plt.plot(x, y_ucb, label='ucb') plt.plot(x, y_klucb, label='kl-ucb') plt.legend() plt.title(r'KL-UCB vs UCB for $\mu_1 =0.9$') plt.show()