Malaria-network-exposure / 00_functions.R
00_functions.R
Raw

#################################################################
###    AUXILIARY FUNCTIONS USED IN VOROS, BELLOTTI ET AL.    ####
#################################################################



### CALCULATE HOUSEHOLD-LEVEL VARIABLES EXCLUDING RESPONDENT

household.aggregate <- function(data, hh_id, resp_id, agg_varname,
                                hh_varname='Household', resp_varname='ID', agg_fun='max') {
  
  reduced <- data[,colnames(data) %in% c(hh_varname, resp_varname, agg_varname)]
  reduced <- reduced[reduced[,hh_varname]==hh_id,]
  
  if(is.null(dim(reduced))) {
    ans <- 0
  } else {
    reduced <- reduced[reduced[,resp_varname]!=resp_id,agg_varname]
    if(sum(is.na(reduced))==length(reduced)) {
      ans <- 0
    } else {
      if(agg_fun=='max') ans <- max(reduced, na.rm=T)
      if(agg_fun=='mean') ans <- mean(reduced, na.rm=T)
      if(agg_fun=='min') ans <- min(reduced, na.rm=T)
    }
  }
  
  return(ans)
  
}




### RSIENA ESTIMATION TO CONVERGENCE (source: RSiena Manual, section 6.3.3)

siena07ToConvergence <- function(alg, dat, eff, ans0=NULL, ...){
  numr <- 0
  ans <- siena07(alg, data=dat, effects=eff, prevAns=ans0, ...) # the first run
  repeat {
    numr <- numr + 1                   # count number of repeated runs
    tmo <- ans$tconv.max               # overall convergence indicator
    tm <- ans$tmax                     # parameterwise convergence indicator
    cat(numr, tmo,"\n")                # report how far we are
    if (is.na(tmo) | is.na(tm)) {break}# collinear effects
    if (tmo < 0.25 & tm < 0.1) {break} # success
    if (tmo > 8 | tm > 8) {break}      # divergence without much hope
    # of returning to good parameter values
    if (numr > 20) {break}             # now it has lasted too long
    ans <- siena07(alg, data=dat, effects=eff, prevAns=ans, ...)
  }
  if (is.na(tmo) | is.na(tm))
  {
    cat("Warning: collinear effects.\n")
  } else {
    if (tmo > 0.25 | tm > 0.1) cat("Warning: convergence inadequate.\n")
  }
  ans
}




### RSIENA AUXILIARY GOODNESS OF FIT FUNCTIONS (source: RSiena documentation)

# Geodesic distance distribution
GeodesicDistribution <- function (i, data, sims, period, groupName,
                                  varName, levls=c(1:5,Inf), cumulative=TRUE, ...) {
  x <- networkExtraction(i, data, sims, period, groupName, varName)
  require(network)
  require(sna)
  a <- sna::geodist(symmetrize(x))$gdist
  if (cumulative)
  {
    gdi <- sapply(levls, function(i){ sum(a<=i) })
  }
  else
  {
    gdi <- sapply(levls, function(i){ sum(a==i) })
  }
  names(gdi) <- as.character(levls)
  gdi
}

# Clique census
CliqueCensus <- function (i, obsData, sims, period, groupName, varName, levls = 1:5){
  require(sna)
  x <- networkExtraction(i, obsData, sims, period, groupName, varName)
  cc0 <- sna::clique.census(x, mode='graph', tabulate.by.vertex = FALSE,
                            enumerate=FALSE)[[1]]
  cc <- 0*levls
  names(cc) <- as.character(levls)
  levels.used <- as.numeric(intersect(names(cc0), names(cc)))
  cc[levels.used] <- cc0[levels.used]
  cc
}

# modification of the mixed triad census of Hollway et al. (2017), 
# based on the RSiena source code; this version of the census only counts triads
# that express some form of agreement between the two actors
mixedTriadCensus.agree <- function (i, obsData, sims, period, groupName, varName) 
{
  if (length(varName) != 2) 
    stop("mixedTriadCensus expects two varName parameters")
  varName1 <- varName[1]
  varName2 <- varName[2]
  m1 <- as.matrix(sparseMatrixExtraction(i, obsData, sims, 
                                         period, groupName, varName = varName1))
  m2 <- as.matrix(sparseMatrixExtraction(i, obsData, sims, 
                                         period, groupName, varName = varName2))
  if ((dim(m1)[1] != dim(m1)[2]) | (dim(m1)[1] != dim(m2)[1])) {
    stop("Error: The first element in varName must be one-mode, the second two-mode")
  }
  cp <- function(m) (-m + 1)
  onemode.reciprocal <- m1 * t(m1)
  onemode.forward <- m1 * cp(t(m1))
  onemode.backward <- cp(m1) * t(m1)
  onemode.null <- cp(m1) * cp(t(m1))
  diag(onemode.forward) <- 0
  diag(onemode.backward) <- 0
  diag(onemode.null) <- 0
  bipartite.twopath <- m2 %*% t(m2)
  bipartite.null <- cp(m2) %*% cp(t(m2))
  bipartite.onestep1 <- m2 %*% cp(t(m2))
  bipartite.onestep2 <- cp(m2) %*% t(m2)
  diag(bipartite.twopath) <- 0
  diag(bipartite.null) <- 0
  diag(bipartite.onestep1) <- 0
  diag(bipartite.onestep2) <- 0
  res <- c(`22` = sum(onemode.reciprocal * bipartite.twopath)/2, 
           `21` = (sum(onemode.forward * bipartite.twopath) + 
                     sum(onemode.backward * bipartite.twopath))/2, 
           `20` = sum(onemode.null * bipartite.twopath)/2, 
           # `12` = (sum(onemode.reciprocal * bipartite.onestep1) + 
           #           sum(onemode.reciprocal * bipartite.onestep2))/2, 
           # `11D` = (sum(onemode.forward * bipartite.onestep1) + 
           #            sum(onemode.backward * bipartite.onestep2))/2, 
           # `11U` = (sum(onemode.forward * bipartite.onestep2) + 
           #            sum(onemode.backward * bipartite.onestep1))/2, 
           # `10` = (sum(onemode.null * bipartite.onestep2) + 
           #           sum(onemode.null * bipartite.onestep1))/2, 
           `02` = sum(onemode.reciprocal * bipartite.null)/2, 
           `01` = (sum(onemode.forward * bipartite.null) + 
                     sum(onemode.backward * bipartite.null))/2, 
           `00` = sum(onemode.null * bipartite.null)/2)
  # dim1 <- dim(m2)[1]
  # dim2 <- dim(m2)[2]
  # nTriads <- dim2 * dim1 * (dim1 - 1)/2
  # if (sum(res) != nTriads) {
  #   stop(paste("Error in calculation. More than", nTriads, 
  #              "triads counted (", sum(res), ")"))
  #}
  res
}