Section_4/RICBC_RI_SC_Choice_numerically_stable.R · Rational-Inattention-Discrete-Choice


BhatDistance <- function(p, q){
  # Bhattacharyya distance between two discrete probabilities on the same state
  # space. 
  out <- - log(sum(sqrt(p * q))) 
  return(out)
}





CalcChoiceProbsUnderRIWithShannon_numstable <- function(Omega, mu, lambda, 
                                              max.iter = 10^8, 
                                              precision = 10^(-16))
{
  
  # Description -------------------------------------------------------------
  
  # Omega is the matrix that describes the payoffs for each action in each state 
  # Columns = States, Rows = Actions 
  
  # mu: Prior distribution over States
  
  # lambda: Flow cost of learning, must be strictly positive! 
  # If too large(>20), adjust max.iter and precision! 
  
  # max.iter: maximal number of iterations 
  
  # precision: threshold for the sum of squared differences between the choice 
  # probabilities of two subsequent iterations so that iterations stop
  
  
  # Pre-Calculations --------------------------------------------------------
  nalt=nrow(Omega)
  nstates=ncol(Omega)
  maxl=c()
  Z_mat=matrix(nrow=nalt,ncol=nstates)
  G=Omega/lambda
  for (i in 1:ncol(G)){
    maxl[i]=max(G[,i])
    Z_mat[,i] <- exp(G[,i]-maxl[i])
    
  }

  # Transformation of the utility index matrix
  #Z_mat <- exp(Omega/lambda)
  
  N_actions <- dim(Omega)[1]
  N_states <- dim(Omega)[2]
  # CREATE A CHECK THAT DIMENSIONS ALL FIT!!!!!!!
  
  
  # Starting Guess ----------------------------------------------------------
  # Pick as a starting guess for the unconditional choice probs a distribution 
  # where the ex-ante optimal action is chosen with probability  
  # "optimal.action.weight". The objective is a better performance with high 
  # lambda values. 
  # 


  optimal.action.weight <- 0.75

  optimal.prior.choice <- which.max(Omega %*% mu)
  Start_Probs <- rep(0, N_actions)
  Start_Probs[optimal.prior.choice] <- optimal.action.weight
  Start_Probs[- optimal.prior.choice] <-
    (1 - optimal.action.weight) / ( N_actions -1)

  Start_CondProbs <- matrix(rep(Start_Probs, N_states), ncol = N_states)
  
  
  
  # Start_CondProbs <- matrix(1/N_actions, nrow = N_actions, ncol = N_states)
  # Start_Probs <- rep(1/N_actions, N_actions)
  
  
  
  # Auxiliary functions used for the iterations -----------------------------
  # Step 1: Calculation of conditional probabilities
  .step1 <- function(Current_Probs, Current_CondProbs){
    new_CondProbs <- Current_CondProbs  
    for(i in 1:N_actions){
      for(j in 1:N_states){
        D1 <- maxl[j]+ log(Current_Probs) %*% Z_mat[, j]
        new_CondProbs[i, j] <- log(Current_Probs[i]) + G[i,j] - D1 
        new_CondProbs[i, j] <-exp(new_CondProbs[i, j])
      }
    }
    return(new_CondProbs)
  }
  
  # Step 2: Calculation of actual (prior) choice probabilities
  .step2 <- function(Current_CondProbs){
    new_Probs <-Current_CondProbs %*% mu
    return(c(new_Probs))
  }
  
  # .iter: Takes a list that contains the current guess of 1) the choice probs 
  # and 2) the choice probs conditional on the state and calculates a new guess  
  # as its output. 
  
  .iter <- function(prob_list){
    old_Probs <- prob_list[[1]]
    old_CondProbs <- prob_list[[2]]
    
    new_CondProbs <-
      .step1(Current_Probs = old_Probs, Current_CondProbs = old_CondProbs)
    new_Probs <- .step2(Current_CondProbs = new_CondProbs)
    
    out <- list(new_Probs, new_CondProbs)
    return(out)
  }
  
  # Actual Iterations -------------------------------------------------------
  
  delta <-  1
  counter <- 0
  prob_list <- list(Start_Probs, Start_CondProbs)
  
  while(delta > precision && counter < max.iter){
    old_prob_list <- prob_list
    prob_list <- .iter(prob_list)
    # delta <- sum((old_prob_list[[1]] - prob_list[[1]])^2)
    # Bhattacharyya distance between old_prob_list[[1]] and prob_list[[1]]
    delta <- BhatDistance(old_prob_list[[1]], prob_list[[1]])    
    
    counter <- counter + 1
    
  }
  
  # print(counter)
  # print(delta)
  
  # print(paste0("Iterations until convergence: ", counter, "."))
  if(counter >= max.iter){
    print("NO CONVERGENCE!")
  }
  
  
  # Revealed Posterior Beliefs ----------------------------------------------
  post_beliefs <- sweep(prob_list[[2]], 1, prob_list[[1]], FUN = '/')
  post_beliefs <- sweep(post_beliefs, 2, mu, FUN = '*')
  
  prob_list[[3]] <- post_beliefs
  
  names(prob_list) <- 
    c("Choice Probabilities", "State Dependent Choice Probabilities", 
      "Revealed Posterior Beliefs")
  
  names(prob_list[[1]]) <- paste0(rep("Action ", N_actions), 1:N_actions)
  rownames(prob_list[[2]]) <- paste0(rep("Action ", N_actions), 1:N_actions, "|")
  colnames(prob_list[[2]]) <- paste0(rep("State ", N_states), 1:N_states)
  
  
  rownames(prob_list[[3]]) <- paste0(rep("Action ", N_actions), 1:N_actions, "|")
  colnames(prob_list[[3]]) <- paste0(rep("State ", N_states), 1:N_states)
  
  
  # Rounding of results (IS THIS OKAY? ESPECIALLY AS WE ARE INTERESTED 
  # IN Pr(a) = 0)! -------------------------------------
  prob_list[[1]] <- round(prob_list[[1]], 5)
  prob_list[[2]] <- round(prob_list[[2]], 5)
  prob_list[[3]] <- round(prob_list[[3]], 5)
  
  return(prob_list) 
}