### LOAD REQUIRED PACKAGES AND AUXILIARY FUNCTIONS
library(openxlsx)
setwd(workdir)
source('00_functions.R')
###
### 1. LOAD DATA
setwd(datadir)
village.codes <- c('v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'v8', 'v9', 'v10')
village.names <- c('WK1', 'WK2', 'WK3', 'WJ1', 'WJ2', 'WJ3', 'SG1', 'SG2', 'SG3', 'SG4')
sheet.names <- c('edgelist', 'attributes')
data.file <- 'Raw data v2 anonymised.xlsx'
data.sheets <- matrix(c(rep(village.codes, each=2),
paste(rep(village.names, each=2), sheet.names),
rep(sheet.names, 10)), ncol=3)
for (i in 1:nrow(data.sheets)) {
sheet <- read.xlsx(data.file, sheet=data.sheets[i,2], rowNames=F, sep.names=' ')
object.name <- paste0(data.sheets[i,1], '.', data.sheets[i,3])
assign(object.name, eval(sheet))
}
rm(i, sheet, object.name, data.sheets)
###
### 2. TRANSFORM EDGELISTS TO ADJACENCY MATRICES
for (i in 1:length(village.names)) {
edge.obj <- paste0('v', i, '.', sheet.names[1])
attr.obj <- paste0('v', i, '.', sheet.names[2])
edge <- eval(parse(text=edge.obj))
attr <- eval(parse(text=attr.obj))
matr.obj <- paste0('v', i, '.adjmatrix')
matr <- matrix(0, nrow(attr), nrow(attr))
rownames(matr) <- attr$ID
colnames(matr) <- attr$ID
for (j in 1:nrow(edge)) {
matr[rownames(matr)==edge[j,1], colnames(matr)==edge[j,2]] <- 1
}
assign(matr.obj, eval(matr))
}
rm(i, j, edge.obj, attr.obj, edge, attr, matr.obj, matr)
###
### 3. REMOVE REDUNDANT EDGELIST OBJECTS
rm(list=ls(pattern='edgelist'))
sheet.names[1] <- 'adjmatrix'
###
### 4. DEFINE THREE TYPES OF OUTDEGREES
for (i in 1:length(village.names)) {
matr.obj <- paste0('v', i, '.', sheet.names[1])
attr.obj <- paste0('v', i, '.', sheet.names[2])
matr <- eval(parse(text=matr.obj))
attr <- eval(parse(text=attr.obj))
# IDs of respondents, non-respondents, and those from external villages
id.resp <- attr$ID[attr$Interviewed=='Yes']
id.nonresp <- attr$ID[attr$Interviewed=='No' & attr$Village==village.names[i]]
id.ext <- attr$ID[attr$Interviewed=='No' & attr$Village!=village.names[i]]
# calculate different outdegrees
odeg.to.resp <- rowSums(matr[,colnames(matr) %in% id.resp])
odeg.to.nonresp <- rowSums(matr[,colnames(matr) %in% id.nonresp])
odeg.to.ext <- rowSums(matr[,colnames(matr) %in% id.ext])
# add outdegree columns to attributes object
attr$'Outdegree to respondents' <- odeg.to.resp
attr$'Outdegree to non-respondents' <- odeg.to.nonresp
attr$'Outdegree out of village' <- odeg.to.ext
assign(attr.obj, eval(attr))
}
rm(i, attr.obj, attr, matr.obj, matr, id.resp, id.nonresp, id.ext,
odeg.to.resp, odeg.to.nonresp, odeg.to.ext)
###
### 5. DROP NON-RESPONDENTS AND PEOPLE FROM OTHER VILLAGES
# exception: people in WJ1 talk to healer of WJ2
# make sure WJ2 healer kept in WJ1
# copy their measure use data from WJ2 to WJ1 for later steps
which.healer <- v4.attributes$ID[v4.attributes$Village %in% 'WJ2' &
v4.attributes$`Role in the village` %in% 'Traditional Healer']
v4.attributes[v4.attributes$ID==which.healer,] <- v5.attributes[v5.attributes$ID==
which.healer,]
rm(which.healer)
# drop unnecessary people
for (i in 1:length(village.names)) {
matr.obj <- paste0('v', i, '.', sheet.names[1])
attr.obj <- paste0('v', i, '.', sheet.names[2])
matr <- eval(parse(text=matr.obj))
attr <- eval(parse(text=attr.obj))
ids.keep <- attr$ID[attr$Interviewed=='Yes']
matr <- matr[rownames(matr) %in% ids.keep, colnames(matr) %in% ids.keep]
attr <- attr[attr$ID %in% ids.keep,]
assign(matr.obj, eval(matr))
assign(attr.obj, eval(attr))
}
rm(i, matr.obj, attr.obj, ids.keep, matr, attr)
###
### 6. DEFINE INDEGREE VARIABLE
for (i in 1:length(village.names)) {
matr.obj <- paste0('v', i, '.', sheet.names[1])
attr.obj <- paste0('v', i, '.', sheet.names[2])
matr <- eval(parse(text=matr.obj))
attr <- eval(parse(text=attr.obj))
indeg <- colSums(matr)
attr$Indegree <- indeg
assign(attr.obj, eval(attr))
}
rm(i, matr.obj, attr.obj, indeg, matr, attr)
###
### 7. DEFINE ASHA- AND HEALER-RELATED VARIABLES
for (i in 1:length(village.names)) {
matr.obj <- paste0('v', i, '.', sheet.names[1])
attr.obj <- paste0('v', i, '.', sheet.names[2])
matr <- eval(parse(text=matr.obj))
attr <- eval(parse(text=attr.obj))
asha <- ifelse(attr$`Role in the village` %in% 'ASHA', 1, 0)
healer <- ifelse(attr$`Role in the village` %in% 'Traditional Healer', 1, 0)
# if there are more than one interviewed Healer, keep the one with highest indegree
# (two villages have two - WK1: 5-4 indegree, SG2: 27-3 indegree)
if (sum(healer)>1) {
healer[healer==1 & attr$Indegree<max(attr$Indegree[healer==1])] <- 0
}
asha.talk <- matr[, asha==1]
if (sum(healer)==1) {
healer.talk <- matr[, healer==1]
} else {
healer.talk <- rep(0, nrow(matr))
}
attr$ASHA <- asha
attr$'Traditional Healer' <- healer
attr$'Talks to ASHA' <- asha.talk
attr$'Talks to Healer' <- healer.talk
assign(attr.obj, eval(attr))
}
rm(i, matr.obj, attr.obj, asha, healer, asha.talk, healer.talk, matr, attr)
###
### 8. RECODE VARIABLES
for (i in 1:length(village.names)) {
attr.obj <- paste0('v', i, '.', sheet.names[2])
attr <- eval(parse(text=attr.obj))
# measure use frequencies to yes-no
attr[attr=='Always'] <- 'Yes'
attr[attr=='Sometimes'] <- 'Yes'
attr[attr=='Rarely'] <- 'No'
attr[attr=='Never'] <- 'No'
# all yes-no to 1-0
attr[attr=='Yes'] <- 1
attr[attr=='No'] <- 0
# female - 1, male - 0
colnames(attr)[colnames(attr)=='Gender'] <- 'Female'
attr[attr=='Female'] <- 1
attr[attr=='Male'] <- 0
# education to levels, 0 - no schooling
colnames(attr)[colnames(attr)=='Level of education'] <- 'Education'
attr[attr=='No Schooling'] <- 0
attr[attr=='Below Primary'] <- 1
attr[attr=='Primary'] <- 2
attr[attr=='Middle'] <- 3
attr[attr=='Secondary'] <- 4
attr[attr=='Higher Secondary'] <- 5
attr[attr=='Graduate'] <- 6
# occupation to works in fields, 1- agricultural labor or cultivator
colnames(attr)[colnames(attr)=='Usual occupation'] <- 'Works in fields'
attr$`Works in fields` <- ifelse(attr$`Works in fields`=='Agricultural labor' |
attr$`Works in fields`=='Cultivator', 1, 0)
# remove unnecessary variables
attr <- attr[,!(colnames(attr) %in% c('Interviewed', 'Notes from the interview',
'Role in the village', 'Use of nets at night',
'Use mosquito mats'))]
# rename a few variables
colnames(attr)[colnames(attr)=='Head of Household'] <- 'Head of household'
colnames(attr)[colnames(attr)=='Indegree'] <- 'Network size in village (indegree)'
colnames(attr)[colnames(attr)=='Outdegree to respondents'] <- 'Network size in village'
colnames(attr)[colnames(attr)=='Outdegree to non-respondents'] <-
'Network size among non-respondents'
colnames(attr)[colnames(attr)=='Outdegree out of village'] <- 'Network size out of village'
colnames(attr)[colnames(attr)=='In charge of caring the sick'] <- 'Carer for a sick person'
colnames(attr)[colnames(attr)=='Nets with insecticide'] <- 'Use: LLINs'
colnames(attr)[colnames(attr)=='Covers up'] <- 'Use: Covering clothes'
colnames(attr)[colnames(attr)=='Use boots'] <- 'Use: Boots'
colnames(attr)[colnames(attr)=='Use gloves'] <- 'Use: Gloves'
colnames(attr)[colnames(attr)=='Use insecticide cream'] <- 'Use: Insecticide cream'
colnames(attr)[colnames(attr)=='Use coils'] <- 'Use: Coils'
colnames(attr)[colnames(attr)=='Use vaporizers'] <- 'Use: Vaporizers'
colnames(attr)[colnames(attr)=='Burn materials'] <- 'Use: Burning materials'
# make most variables numeric
attr[,!(colnames(attr) %in% c('ID', 'Village', 'Household'))] <-
as.numeric(unlist(attr[,!(colnames(attr) %in% c('ID', 'Village', 'Household'))]))
# save new data object
assign(attr.obj, eval(attr))
}
rm(i, attr.obj, attr)
###
### 9. DEFINE HOUSEHOLD VARIABLES
for (i in 1:length(village.names)) {
matr.obj <- paste0('v', i, '.', sheet.names[1])
attr.obj <- paste0('v', i, '.', sheet.names[2])
matr <- eval(parse(text=matr.obj))
attr <- eval(parse(text=attr.obj))
household <- substr(attr$ID, 1, 9)
attr$Household <- household
hh.asha.talk <- apply(attr, 1, FUN=function(x)
household.aggregate(attr, x['Household'], x['ID'], 'Talks to ASHA', agg_fun='max'))
hh.healer.talk <- apply(attr, 1, FUN=function(x)
household.aggregate(attr, x['Household'], x['ID'], 'Talks to Healer', agg_fun='max'))
hh.exp.llins <- apply(attr, 1, FUN=function(x)
household.aggregate(attr, x['Household'], x['ID'], 'Use: LLINs', agg_fun='max'))
hh.exp.cover <- apply(attr, 1, FUN=function(x)
household.aggregate(attr, x['Household'], x['ID'], 'Use: Covering clothes', agg_fun='max'))
hh.exp.boots <- apply(attr, 1, FUN=function(x)
household.aggregate(attr, x['Household'], x['ID'], 'Use: Boots', agg_fun='max'))
hh.exp.gloves <- apply(attr, 1, FUN=function(x)
household.aggregate(attr, x['Household'], x['ID'], 'Use: Gloves', agg_fun='max'))
hh.exp.cream <- apply(attr, 1, FUN=function(x)
household.aggregate(attr, x['Household'], x['ID'], 'Use: Insecticide cream',
agg_fun='max'))
hh.exp.coils <- apply(attr, 1, FUN=function(x)
household.aggregate(attr, x['Household'], x['ID'], 'Use: Coils', agg_fun='max'))
hh.exp.evap <- apply(attr, 1, FUN=function(x)
household.aggregate(attr, x['Household'], x['ID'], 'Use: Vaporizers', agg_fun='max'))
hh.exp.burn <- apply(attr, 1, FUN=function(x)
household.aggregate(attr, x['Household'], x['ID'], 'Use: Burning materials',
agg_fun='max'))
attr$'Household talks to ASHA' <- hh.asha.talk
attr$'Household talks to Healer' <- hh.healer.talk
attr$'Household exposure to LLINs' <- hh.exp.llins
attr$'Household exposure to Covering clothes' <- hh.exp.cover
attr$'Household exposure to Boots' <- hh.exp.boots
attr$'Household exposure to Gloves' <- hh.exp.gloves
attr$'Household exposure to Insecticide cream' <- hh.exp.cream
attr$'Household exposure to Coils' <- hh.exp.coils
attr$'Household exposure to Vaporizers' <- hh.exp.evap
attr$'Household exposure to Burning materials' <- hh.exp.burn
assign(attr.obj, eval(attr))
}
rm(i, matr.obj, attr.obj, household, hh.asha.talk, hh.healer.talk, matr, attr)
rm(list=ls(pattern='hh.exp'))
###
### 10. DEFINE NETWORK EXPOSURE VARIABLES
for (i in 1:length(village.names)) {
matr.obj <- paste0('v', i, '.', sheet.names[1])
attr.obj <- paste0('v', i, '.', sheet.names[2])
matr <- eval(parse(text=matr.obj))
attr <- eval(parse(text=attr.obj))
net.exp.llins <- colSums(attr[,'Use: LLINs']*t(matr), na.rm=T) / rowSums(matr)
net.exp.cover <- colSums(attr[,'Use: Covering clothes']*t(matr), na.rm=T) / rowSums(matr)
net.exp.boots <- colSums(attr[,'Use: Boots']*t(matr), na.rm=T) / rowSums(matr)
net.exp.gloves <- colSums(attr[,'Use: Gloves']*t(matr), na.rm=T) / rowSums(matr)
net.exp.cream <- colSums(attr[,'Use: Insecticide cream']*t(matr), na.rm=T) / rowSums(matr)
net.exp.coils <- colSums(attr[,'Use: Coils']*t(matr), na.rm=T) / rowSums(matr)
net.exp.evap <- colSums(attr[,'Use: Vaporizers']*t(matr), na.rm=T) / rowSums(matr)
net.exp.burn <- colSums(attr[,'Use: Burning materials']*t(matr), na.rm=T) / rowSums(matr)
# those with 0 outdegree have 0 exposure
net.exp.llins[is.nan(net.exp.llins)] <- 0
net.exp.cover[is.nan(net.exp.cover)] <- 0
net.exp.boots[is.nan(net.exp.boots)] <- 0
net.exp.gloves[is.nan(net.exp.gloves)] <- 0
net.exp.cream[is.nan(net.exp.cream)] <- 0
net.exp.coils[is.nan(net.exp.coils)] <- 0
net.exp.evap[is.nan(net.exp.evap)] <- 0
net.exp.burn[is.nan(net.exp.burn)] <- 0
attr$'Network exposure to LLINs' <- net.exp.llins
attr$'Network exposure to Covering clothes' <- net.exp.cover
attr$'Network exposure to Boots' <- net.exp.boots
attr$'Network exposure to Gloves' <- net.exp.gloves
attr$'Network exposure to Insecticide cream' <- net.exp.cream
attr$'Network exposure to Coils' <- net.exp.coils
attr$'Network exposure to Vaporizers' <- net.exp.evap
attr$'Network exposure to Burning materials' <- net.exp.burn
assign(attr.obj, eval(attr))
}
rm(i, matr.obj, attr.obj, matr, attr)
rm(list=ls(pattern='net.exp'))
###
### 11. CREATE OBJECTS USEFUL INFORMATION FOR LATER ANALYSES
# variable names by groups
vars.dep <- c('Use: LLINs', 'Use: Covering clothes',
'Use: Boots', 'Use: Gloves', 'Use: Insecticide cream',
'Use: Coils', 'Use: Vaporizers', 'Use: Burning materials')
vars.indiv <- c('Female', 'Head of household', 'Carer for a sick person',
'Works in fields', 'Age', 'Education')
vars.oplead <- c('Talks to ASHA', 'Talks to Healer',
'Household talks to ASHA', 'Household talks to Healer')
vars.netsize <- c('Network size in village', 'Network size among non-respondents',
'Network size out of village', 'Network size in village (indegree)')
vars.netexp <- c('Network exposure to LLINs', 'Network exposure to Covering clothes',
'Network exposure to Boots', 'Network exposure to Gloves',
'Network exposure to Insecticide cream', 'Network exposure to Coils',
'Network exposure to Vaporizers', 'Network exposure to Burning materials')
vars.hhexp <- c('Household exposure to LLINs', 'Household exposure to Covering clothes',
'Household exposure to Boots', 'Household exposure to Gloves',
'Household exposure to Insecticide cream', 'Household exposure to Coils',
'Household exposure to Vaporizers', 'Household exposure to Burning materials')
# which villages have a healer?
has.healer <- rep(NA, length(village.names))
for (i in 1:length(village.names)) {
attr.obj <- paste0('v', i, '.', sheet.names[2])
attr <- eval(parse(text=attr.obj))
has.healer[i] <- ifelse(sum(attr$`Traditional Healer`)>0, T, F)
}
rm(i, attr.obj, attr)
###
### 12. CREATE SINGLE ATTRIBUTES DATASET WITH ALL OF THE VILLAGES
# create single attribute dataset with ashas and healers
v.attributes.with.ashas.healers <- rbind(v1.attributes, v2.attributes, v3.attributes,
v4.attributes, v5.attributes, v6.attributes,
v7.attributes, v8.attributes, v9.attributes,
v10.attributes)
# remove duplicate row of WJ2 healer in WJ1
v.attributes.with.ashas.healers <-
v.attributes.with.ashas.healers[!duplicated(v.attributes.with.ashas.healers$ID, fromLast=T),]
# create attribute dataset without ashas and healers (to be analysed)
v.attributes <- v.attributes.with.ashas.healers
v.attributes <- v.attributes[v.attributes$ASHA!=1 & v.attributes$`Traditional Healer`!=1,]
###
### 13. SAVE DATA AS CSVs (IF REQUESTED) AND CLEAN UP WORKSPACE
if (save.data) {
# save single dataset without ashas and healers (used in logit analysis)
write.csv(v.attributes, file = 'attributes_analytical_sample.csv', row.names=F)
# save village datasets with ashas and healers (used in SAOM analysis)
s.names <- paste(rep(village.names, each=2), sheet.names[2:1], sep='_')
s.objects <- paste0(rep(village.codes, each=2), '.', sheet.names[2:1])
for (i in 1:length(s.objects)) {
towrite <- eval(parse(text=s.objects[i]))
write.csv(towrite,
file=paste0('v', sprintf("%02d", ceiling(i/2)), '_', s.names[i], '.csv'))
}
rm(i, s.names, s.objects, towrite)
}
# delete unnecessary objects
rm(household.aggregate, predictive.acc, reg.backward, siena07ToConvergence,
GeodesicDistribution, CliqueCensus, mixedTriadCensus.agree,
save.data)
# change back to working directory
setwd(workdir)