library(tidyverse)
library(haven)
library(webshot2)
library(survey)
#library(expss)
library(gt) # must be loaded after expss otherwise tables crash
# setwd("E:\\Dropbox\\Projects\\Gaza\\Data\\Final 1" #This does not work when using markdown. Data must be in project directory
agebounds<-function(cuts){
# create typical age labels from a cut string
out<-character()
for (i in 1:(length(cuts)-1)) {
lower<-cuts[i]+1
if ((is.infinite(lower)) || (lower<0)) lower<-0
upper<-cuts[i+1]
if (lower<10) strlower<-paste(" ",lower,sep="") else strlower<-paste(lower)
if (i<length(cuts)-1) {out<-append(out,paste(strlower,"-",upper,sep=''))}
else {out<-append(out,paste(lower,"+",sep=''))}
}
return(out)
}
toString<-function(x,str) {
# Stupid function to put in labels in tables (alternative to using factors or strings the whole way)
# Replaces numbers with corresponding strings
max<-length(x)
out<-vector()
for (i in 1:max) {
value<-x[i]
replacement<-str[value]
out<-append(out,replacement)
}
return(out)
}
saveTable<-function(table) {
# Saves a gt table in two formats
fname<-deparse(match.call()$table)
gtsave(table,paste(fname,".docx",sep=""))
gtsave(table,paste(fname,".pdf",sep=""))
}
#### Load files #####
#Load the roster file and, for weighting, population data from
#the US Census Bureau. https://www.census.gov/data-tools/demo/idb/#/pop?dashboard_page=country&COUNTRY_YR_ANIM=2025&CCODE=XG&popPages=BYAGE&menu=popViz&show_countries=n&CCODE_SINGLE=XG&subnat_map_admin=ADM1&POP_YEARS=2022
maindata<-"GMS Household roster.sav" # note this file has Khan Younis and Deir al Balah switched in order as in the questionnaire
birthsdata="GMS Births.sav"
s_govnames<-c("North Gaza"=1,"Gaza City"=2,"Deir al Balah"=3,"Khan Younis"=4,"Rafah"=5)
fates1 <-
haven::read_sav(maindata)
# recode governorates to reduce my confusion about order
fates1<-fates1 %>% mutate(HR00=case_when(HR00==1~1,
HR00==2~2,
HR00==3~4,
HR00==4~3,
HR00==5~5))
#val_lab(fates1$HR00)<-s_govnames
# Births
births<-
haven::read_sav(birthsdata)
# recode governorates to reduce my confusion about order
births<-births %>% mutate(HR00=case_when(HR00==1~1,
HR00==2~2,
HR00==3~4,
HR00==4~3,
HR00==5~5))
#val_lab(births$HR00)<-s_govnames
numbirths=nrow(births)
numbirths
birthsex<-table(births$HR04)
Population <- readxl::read_excel("Population Gaza Single year age groups IDB_2023.xlsx")
# From original IDB download: Changed 100+ to 100, changed variable names
View(Population)
# Calculate adjustment to population at risk
# Needs to be done on all marginals
USCBPop23<-Population$Total[1] # Total population
Persons.in.roster.file<-nrow(fates1)
AverageExpansion<-USCBPop23/Persons.in.roster.file
AverageExpansion
birthsex<-round(birthsex*AverageExpansion)
TotalAtRisk<-USCBPop23+sum(birthsex) #using estimated births, rather than projected (due to uncertainty in projected due to conflict)
USCBPopAtRisk<-TotalAtRisk
USCBScaleFactor<-USCBPopAtRisk/USCBPop23
USCBScaleFactor
# The births will be distributed to boys and girls at 0 age.
Population$Total[1]<-TotalAtRisk
Population$Male[1]<-birthsex[1]+Population$Male[1]
Population$Female[2]<-birthsex[2]+Population$Female[2]
Population$Male[2]<-birthsex[1]+Population$Male[2]
Population$Female[1]<-birthsex[2]+Population$Female[1]
#Calculate weights for each person in the roster file
#(now called "fates") so that the percentages in the demographic
#categories listed below match across fates and the estimates of
#the Census Bureau, adjusted for new births
agebreaks<-c(-1, 4, 11, 17,29, 39, 64, Inf)
agestr<-agebounds(agebreaks)
Population$AgeGroups<-as.integer(cut(as.numeric(Population$GROUP), breaks = agebreaks)) #Note change to 0 to 4
AgeGroups<-as.data.frame(group_by(Population,AgeGroups) %>% summarise(Males=sum(Male),
Females=sum(Female)))
AgeGroups<-AgeGroups %>% mutate(AgeGr=case_when(is.na(AgeGroups)~"Total",
.default=agestr[AgeGroups]),
AgeGroups=case_when(is.na(AgeGroups)~100,
.default=AgeGroups))
AgeGroups
AgeGroups$PctFem<-AgeGroups$Females/(AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total'])*100
AgeGroups$PctMal<-AgeGroups$Males/(AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total'])*100
IDBAgeTable <- AgeGroups %>% select(AgeGr,Males,PctMal, Females,PctFem)%>% gt()%>%
tab_header(
title = "Gaza Strip population at risk 2023, broad age groups",
subtitle = "based on the International Data Base, U..S Census Bureau"
) %>% tab_source_note(
source_note = "Percentages are based on the total population"
) %>% tab_source_note(
source_note = "Births during survey period added"
) %>% tab_source_note(
source_note = "Source: https://www.census.gov/ International Programs Main/ Data /International Database."
) %>%
tab_source_note(
source_note = "Downloaded 25.01.2025"
) %>%
cols_label(
AgeGr="Age", PctMal="Male percent", PctFem="Female percent"
) %>%
fmt_number(n_sigfig = 3) %>%
fmt_number(
columns=c(PctMal,PctFem),
decimals=1
)
print(IDBAgeTable)
saveTable(IDBAgeTable)
#### Marginals ####
# Construct marginals
IDBGenderMarginal<-data.frame(HR04=c(1,2), Freq=c(AgeGroups$Males[nrow(AgeGroups)],AgeGroups$Females[nrow(AgeGroups)])) # IDB Gender
IDBGenderMarginal
IDBTotalAge<-AgeGroups$Males[-nrow(AgeGroups)]+ AgeGroups$Females[-nrow(AgeGroups)]
IDBTotalAgeMarginal<-data.frame(AgeGroups=seq(1,length(IDBTotalAge)),Freq=IDBTotalAge)
IDBTotalAgeMarginal
sum(IDBTotalAgeMarginal$Freq)==sum(IDBGenderMarginal$Freq) # This should be true
# Also construct a marginal that is a blend of age and gender - to get that exact
IDBMales<-data.frame(AgeSex=paste(agestr,"Male"),Freq=AgeGroups$Males[-nrow(AgeGroups)])
IDBFemales<-data.frame(AgeSex=paste(agestr,"Female"),Freq=AgeGroups$Females[-nrow(AgeGroups)])
IDBSeparateSexesAndAgeMarginal<-bind_rows(IDBMales,IDBFemales)
IDBSeparateSexesAndAgeMarginal
sum(IDBSeparateSexesAndAgeMarginal$Freq)==sum(IDBGenderMarginal$Freq) # This should be true
#### Age gender table ####
# Age Gender table from roster file
fates<-rbind(fates1,births)
fates$AgeGroups<-as.integer(cut(as.numeric(fates$HR03), breaks = agebreaks))
fates<-fates %>% mutate(AgeGr=case_when(is.na(AgeGroups)~"Total",
.default=agestr[AgeGroups]),
Gender=case_when(HR04==1~'Male',
HR04==2~'Female'))
fates$AgeSex<-paste(fates$AgeGr,fates$Gender,sep=" ")
RosterAge<-fates %>% group_by(AgeGr,Gender) %>% summarize(sumpop=n())
RosterAge<-reshape(as.data.frame(RosterAge),idvar="AgeGr", timevar="Gender",direction="wide")
RosterAge
names(RosterAge)<-c('AgeGr','FemaleRoster','MaleRoster')
RosterAge<-rbind(RosterAge,data.frame(AgeGr='Total',FemaleRoster=sum(RosterAge$FemaleRoster),MaleRoster=sum(RosterAge$MaleRoster)))
RosterAge$PctFemRoster<-RosterAge$FemaleRoster/(RosterAge$MaleRoster[RosterAge$AgeGr=='Total']+RosterAge$FemaleRoster[RosterAge$AgeGr=='Total'])*100
RosterAge$PctMalRoster<-RosterAge$MaleRoster/(RosterAge$MaleRoster[RosterAge$AgeGr=='Total']+RosterAge$FemaleRoster[RosterAge$AgeGr=='Total'])*100
RosterAge
SurveyAgeTable <- RosterAge %>% select(AgeGr,MaleRoster,PctMalRoster, FemaleRoster,PctFemRoster) %>%
gt()%>%
tab_header(
title = "Household members October 6, 2023 and births in survey period by broad age groups" ) %>%
tab_source_note(source_note = "Unweighted. Percentages are based on the total") %>%
tab_source_note(source_note = paste("Based on ",maindata)) %>%
cols_label(
AgeGr="Age",
MaleRoster="Male",PctMalRoster="Percent male", FemaleRoster="Females",PctFemRoster="Percent female"
) %>%
fmt_number(
columns=c(PctMalRoster,PctFemRoster),
decimals=1
)
print(SurveyAgeTable)
saveTable(SurveyAgeTable)
#### Governorate marginals ####
# Construct governorate marginals
# Since the adjusted weights also should reflect what we think is reality, not only the sampling
# we use governorate marginals adjusted to the IDB Census Bureau population.
#Governorate PCBS 2023 IDB adjusted Questionnaire code Logical code PSUs
#North Gaza 444412 418833 1 1 36
#Gaza City 749100 705983 2 2 72
#Deir al Balah 319208 300835 4 3 28
#Khan Younis 438557 413315 3 4 42
#Rafah 275267 259423 5 5 22
# Note that the scale factor for new births is applied
governoratemarginals<-data.frame(HR00=seq(1,5,1), Freq=round(c( 418833,705983, 300835,413315, 259423)*USCBScaleFactor ))
#val_lab(governoratemarginals$HR00)<-s_govnames
governoratemarginals
# Note that governorate marginals are used on governorate of origin, not of interview.
# Construct household size marginals
# Note that we have to scale to current population and since we are using the roster file
# we have to use marginals that are population in each size group
householdsize2017 <-c(10661,31474,34084,43481,49305, 50239,42741,30693,19041,(10547 + 12444))
hhsizes<-c(seq(1,9,1),11.43151668)
popbyhouseholdsize2017<-householdsize2017*hhsizes
scalefactor2017to2023<-sum(IDBGenderMarginal$Freq)/sum(popbyhouseholdsize2017)
householdsizemarginals2017<-popbyhouseholdsize2017*scalefactor2017to2023
householdsizemarginals2017<-data.frame(hsize10=seq(1,10,1),Freq=householdsizemarginals2017)
householdsizemarginals2017
# Here from MICS 2019-2020 household file (Downloaded from https://mics.unicef.org/surveys)
#the initial data are persons in each household size group
hhsizemarginalsMICS<-c(110,
501,
1069,
1841,
2458,
3218,
3289,
2255,
1816,
2502) # From 1 to 10+
MICSScalefactor<-sum(IDBGenderMarginal$Freq)/sum(hhsizemarginalsMICS)
householdsizemarginalsMICS<-hhsizemarginalsMICS*MICSScalefactor
householdsizemarginalsMICS<-data.frame(hsize10=seq(1,10,1),Freq=householdsizemarginalsMICS)
householdsizemarginalsMICS
# Difference between distributions based on 2017 and MICS
round(householdsizemarginals2017$Freq-householdsizemarginalsMICS$Freq)
sum(householdsizemarginals2017$Freq)-sum(householdsizemarginalsMICS$Freq) # Should be 0
# get household sizes into the data file
fates$onedummy<-1 # simple counting variable that is also used initially for weights
# fates$Total<-1
pop23<-AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total']
fates$simpleExpand<-pop23/nrow(fates)
hsiz=aggregate(fates$onedummy,list(fates$prikey),sum)
names(hsiz)<-c("prikey","hsize")
fates<-left_join(fates,hsiz,by="prikey")
fates$hsize10<-ifelse(fates$hsize<10,fates$hsize,10)
##### Construct survey design ####
#Construct strata, weights. Also add variables for sample margins, and survey
#design
# Constructed total marginals are:
# AgeSexmatches IDBSeparateSexesAndAgeMarginal
# AgeGroups matches IDBTotalAgeMarginal
# HR04 matches IDBGenderMarginal
# AgeSex matches IDBSeparateSexesAndAgeMarginal
# hsize10 matches householdsizemarginalsMICS
# hsize10 matches householdsizemarginals2017
# Some useful strings
s_mf<-c("Male", "Female")
s_status<-c("Resident","Left the Gaza Strip","Elsewhere in the Gaza Strip","Dead","Missing","Imprisoned" )
s_cause_dead_full<-c("Disease etc","Disease etc, no aid","Accident","Violent", "Not known")
s_Cause_dead_short<-c("Total violent deaths","Total nonviolent deaths")
s_governorates<-c("Northern Gaza","Gaza city","Deir al-Balah","Khan Younis","Rafah")
s_maincat<-c("Children (<18)","Women (18-64)","Elderly (65+)","Men (18-64)")
s_WEC<-"Women, elderly, and children"
#### Category coding ####
# categories for reporting - should be done before design definitions since design incorporates data
fates<- fates %>% mutate(Categories=case_when(HR07< 18 ~ 1,
HR04==2 & HR07>17 & HR07<65 ~ 2,
HR07>64 ~3,
HR04==1 & HR07>17 & HR07<65 ~ 4),
WEC=case_when(is.na(HR07)~0,
HR04==2 | (HR07<18 | HR07>64)~1,
HR04==1 & (HR07>17 & HR07<65)~0),
WECmiss=case_when(HR04==2 | (HR03<18 | HR03>64)~1,
HR04==1 & (HR03>17 & HR03<65)~0),
Allmiss=if_else(HR05==5,1,0),
CatMiss=case_when(HR03< 18 ~ 1,
HR04==2 & HR03>17 & HR03<65 ~ 2,
HR03>64 ~3,
HR04==1 & HR03>17 & HR03<65 ~ 4),
typedeath=case_when(HR06==4~1,
HR06<4 ~2),
Dead=if_else(HR05==4,1,0),
Violent=if_else(HR05==4& HR06==4,1,0),
Nonviolent=if_else(HR05==4 & HR06<4,1,0),
Missing=if_else(HR05==5,1,0))
fates<- fates %>% mutate(ratedenominator = case_when(HR01>50 & (HR05==4 | HR05==2)~0.25,
HR01>5~0.5,
HR05==1~1,
HR05==2~0.5,
HR05==3~1,
HR05==4~0.5,
HR05==5~1,
HR05==6~1))
fates$stratum<-fates$HR00*10+fates$PSUType
uwdesign<-svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~onedummy) # uw prefix means unweighted
tbs<-svytable(~HR05+HR04,uwdesign)
ftable(tbs)
# The following should match (simple unweighted table)
xtabs(~HR05+HR04,fates)
#
# Full raked design
rakedesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginalsMICS,governoratemarginals))
svytable(~HR04+HR05,rakedesign)
svyby(~onedummy,~HR04,design=rakedesign,svytotal)
#
# Check weights
#
weights<-weights(rakedesign)
boxplot(weights)
summary(weights)
# This is a bit high variation
# so let us fix that
trimmedrakedesign<-trimWeights(rakedesign, upper=median(weights*2), lower=90)
weights<-weights(trimmedrakedesign)
boxplot(weights)
summary(weights)
# Export data file
GMS_Used_data<-model.frame(trimmedrakedesign)
GMS_Used_data<-cbind(GMS_Used_data,weights)
saveRDS(GMS_Used_data,file="GMS_at_risk_apr_18_2025.rds")
rm(GMS_Used_data)
#### Main table ####
# Master table - trimmed rakeddesign
currentdesign<-trimmedrakedesign
tbl<-round(svyby(~onedummy,~HR05+~HR04,design=currentdesign,svytotal,vartype=c("se","ci")))
tbl
tbl$HR05<-toString(tbl$HR05,s_status)
tbl$HR04<-toString(tbl$HR04,s_mf)
names(tbl)<-c("Status","Gender", "Estimate","se","ci_l","ci_u")
tbl
tblT<-round(svyby(~onedummy,~HR05,design=currentdesign,svytotal,vartype=c("se","ci")))
tblT$HR05<-toString(tblT$HR05,s_status)
tblT$Gender<-rep("Total",length(s_status))
tblT<-tblT[,c("HR05","Gender","onedummy","se","ci_l","ci_u")]
tblT
names(tblT)<-c("Status","Gender", "Estimate","se","ci_l","ci_u")
tblT
tblT<-bind_rows(tbl,tblT)
mainoutcomestable<-tblT%>%
group_by(Gender) %>%
gt(row_group_as_column = TRUE) %>%
tab_header(
title = "Gaza Strip Survey main status outcomes") %>%
tab_source_note(source_note ="Raked to age,gender, governorate, and household size") %>%
tab_source_note(source_note = paste("Based on Gaza Mortality Survey ",maindata)) %>%
cols_merge(
columns=c(ci_l,ci_u),
pattern="({1}, {2})")%>%
cols_label(se~"Standard<br>error",
ci_l~"Confidence<br>Interval",
.fn = md) %>%
fmt_number(n_sigfig = 3)
mainoutcomestable
saveTable(mainoutcomestable)
# main table of deaths
# Baseline deaths are IDB estimated deaths 2023 6348, 2024 6188,2025 6043 87 days in 2023
#https://www.census.gov/data-tools/demo/idb/#/table?dashboard_page=sources&COUNTRY_YR_ANIM=2023&CCODE_SINGLE=XG&subnat_map_admin=ADM1&CCODE=XG&COUNTRY_YEAR=2023&menu=tableViz&TABLE_YEARS=2023,2024,2025&TABLE_USE_RANGE=N&TABLE_USE_YEARS=Y&TABLE_STEP=1&TABLE_ADD_YEARS=2023,2024,2025&quickReports=CUSTOM&CUSTOM_COLS=CDR,DEATHS
# Downloaded 8.2.2025
# Median day of fieldwork is Jan 2, 2025
end <- as.Date("2023-12-31")
start <- strptime("2023-10-06", format="%Y-%m-%d", tz="UTC")
DaysIn2023<-as.numeric(difftime(as.POSIXct(end), as.POSIXct(start, tz="UTC"), units="days"))
DaysIn2025=2
baseline<-6348*DaysIn2023/365+6188+6043*DaysIn2025/365 # might be considered slightly misplaced concreteness
# Define a function to make it easier to repeat
#### Makemaintable function ####
makemaintable<-function(currentdesign,designdescription) {
#First select violent deaths - HR06==4
tb1<-round(svyby(~onedummy,~Categories,design=subset(currentdesign,HR06==4),svytotal,vartype=c("se","ci"))) #age and gender
tb1$Categories<-toString(tb1$Categories,s_maincat)
tbwec<- svyby(~onedummy,~WEC,design=subset(currentdesign,HR06==4 & WEC==1),svytotal,vartype=c("se","ci"))
names(tbwec)[1]<-"Categories"
tbwec$Categories<-s_WEC
tb2<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci"))) # violent / nonviolent
names(tb2)[1]<-"Categories"
tb2$Categories<-toString(tb2$Categories,s_Cause_dead_short)
tb2
tbp1<-svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci")) #Women,children, elderly, percent
tbp2<-confint(svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci")))
tb3<-data.frame(Categories=paste(s_WEC," (%)"),onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
tb3
tb4<-round(svyby(~onedummy,~CatMiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci"))) #missing age and gender
names(tb4)[1]<-"Categories"
tb4<-add_row(tb4,Categories=3,onedummy=0, se=0,ci_l=0,ci_u=0,.before=3)
tb4$Categories<-toString(tb4$Categories,s_maincat)
tb4
tb5<-round(svyby(~onedummy,~Allmiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci"))) # missing
names(tb5)[1]<-"Categories"
tb5[1]<-"Total missing persons"
tb5
tbp1<-svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci")) #Women,children, elderly, percent
tbp2<-confint(svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci")))
tb6<-data.frame(CatMiss=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
names(tb6)[1]<-"Categories"
tb6
# make a collected frame for the table
tb1$type<-"Violent deaths"
tb2$type<-"Violent deaths"
tb2$type[2]<-"Nonviolent deaths"
tb3$type<-"Violent deaths"
tb4$type<-"Missing persons"
tb5$type<-"Missing persons"
tb6$type<-"Missing persons"
tbwec$type<-"Violent deaths"
#
MF<-rbind(tb1,tbwec,tb2[1,],tb3)
eframe<-data.frame(Categories="Excess nonviolent deaths", onedummy=round(tb2[2,2]-baseline),se=round(tb2[2,3]),ci_l=round(tb2[2,4]-baseline),ci_u=round(tb2[2,5]-baseline),type="Nonviolent deaths")
MF<-rbind(MF,tb2[2,],eframe)
MF<-rbind(MF,tb4, tb5,tb6)
MF
row_number(MF)
return(MF%>% select(type,Categories,onedummy,se,ci_l,ci_u) %>%
group_by(type) %>%
gt(row_group_as_column = TRUE) %>%
fmt_number(n_sigfig = 3)%>%
tab_header(
title = md("Estimates of violent, nonviolent mortality and missing persons<br>by broad age and gender groups")) %>%
tab_source_note(source_note =designdescription ) %>%
tab_source_note(source_note = paste("Based on ",maindata)) %>%
cols_merge(
columns=c(ci_l,ci_u),
pattern="({1}, {2})")%>%
cols_label(se~"Standard<br>error",
ci_l~"Confidence<br>Interval",
onedummy="Estimate",
.fn = md)
)
}
trimraketable<-makemaintable(trimmedrakedesign,"Raked to age,gender, governorate, and household size")
print(trimraketable)
saveTable(trimraketable)
# Make a table of deaths per governorate
tb1<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci"))) # all deaths by governorate
tb1$type<-"Total"
names(tb1)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb1$Governorate<-toString(tb1$Governorate,s_governorates)
tb1
tb2<-round(ftable(svytotal(~onedummy,design=subset(currentdesign,HR05==4,vartype=c("se","ci"))) )) #total deaths, all governorates
tb2ci<-ftable(round(confint(svytotal(~onedummy,design=subset(currentdesign,HR05==4) ))))
tb2<-data.frame(Governorate="Total",Estimate=tb2[1,1],se=tb2[1,2],ci_l=tb2ci[1,1],ci_l=tb2ci[1,2],type="Total")
names(tb2)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb2[1,1]<-"Total"
tb2
tb3<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci"))) # Total violent / nonviolent
tb3$type=c("Violent deaths","Nonviolent deaths")
names(tb3)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb3$Governorate<-"Total"
tb3
tb4<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,typedeath==1),svytotal,vartype=c("se","ci"))) # violent
tb4$type<-"Violent deaths"
names(tb4)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb4$Governorate<-toString(tb4$Governorate,s_governorates)
tb4
tb5<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,typedeath==2),svytotal,vartype=c("se","ci"))) # Nonviolent
tb5$type<-"Nonviolent deaths"
names(tb5)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb5$Governorate<-toString(tb5$Governorate,s_governorates)
tb5
# collect t he governorate table
govtab<-rbind(tb1,tb2,tb4,tb3[1,],tb5,tb3[2,])
governoratetable<-govtab %>% select(type,Governorate,Estimate,se,ci_l,ci_u) %>%
group_by(type) %>%
gt(row_group_as_column = TRUE) %>%
fmt_number(n_sigfig = 3)%>%
tab_header(
title = md("Estimates of violent, nonviolent mortality<br>by governorate of residence on October 6, 2023")) %>%
tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
tab_source_note(source_note = paste("Based on ",maindata)) %>%
cols_merge(
columns=c(ci_l,ci_u),
pattern="({1}, {2})")%>%
cols_label(se~"Standard<br>error",
ci_l~"Confidence<br>Interval",
.fn = md)
print(governoratetable)
saveTable(governoratetable)
#### Crude Death Rates #####
# calculate crude death rate, based on IDPpop
# Use numerators and denominator as defined in "category coding" above
ViolentCDR<-svyratio(~Violent,~ratedenominator,design=currentdesign)
NonviolentCDR<-svyratio(~Nonviolent,~ratedenominator,design=currentdesign)
TotalCDR<-svyratio(~Dead,~ratedenominator,design=currentdesign)
ViolentCDRCI<-confint(svyratio(~Violent,~ratedenominator,design=currentdesign))
NonviolentCDRCI<-confint(svyratio(~Nonviolent,~ratedenominator,design=currentdesign))
TotalCDRCI<-confint(svyratio(~Dead,~ratedenominator,design=currentdesign))
tabRatesA<-data.frame(Type=c("Violent","Nonviolent","Total"),CDR=c(ViolentCDR$ratio[1,1],NonviolentCDR$ratio[1,1],TotalCDR$ratio[1,1])*1000*12/15,
SE=sqrt(c(ViolentCDR$var[1,1],NonviolentCDR$var[1,1],TotalCDR$var[1,1]))*1000*12/5,
l_ci=c(ViolentCDRCI[1,1],NonviolentCDRCI[1],TotalCDRCI[1])*1000*12/15,
u_ci=c(ViolentCDRCI[1,2],NonviolentCDRCI[2],TotalCDRCI[2])*1000*12/15)
AnnualizedCDRTable<-tabRatesA %>% gt() %>%
fmt_number(n_sigfig = 3)%>%
tab_header(
title = md("Annualized mortality rates for the Gaza Strip<br>October 6, 2023 to January 5, 2025")) %>%
tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
tab_source_note(source_note = md(paste("Based on household roster and births <br>and IDB US Census Bureau population estimates"))) %>%
cols_merge(
columns=c(l_ci,u_ci),
pattern="({1}, {2})")%>%
cols_label(SE~"Standard<br>error",
l_ci~"Confidence<br>Interval",
.fn = md)
print(AnnualizedCDRTable)
saveTable(AnnualizedCDRTable)
#
tabRates<-data.frame(Type=c("Violent","Nonviolent","Total"),CDR=c(ViolentCDR$ratio[1,1],NonviolentCDR$ratio[1,1],TotalCDR$ratio[1,1])*1000,
SE=sqrt(c(ViolentCDR$var[1,1],NonviolentCDR$var[1,1],TotalCDR$var[1,1]))*1000,
l_ci=c(ViolentCDRCI[1,1],NonviolentCDRCI[1],TotalCDRCI[1])*1000,
u_ci=c(ViolentCDRCI[1,2],NonviolentCDRCI[2],TotalCDRCI[2])*1000)
CDRTable<-tabRates %>% gt() %>%
fmt_number(n_sigfig = 3)%>%
tab_header(
title = md("Mortality rates for the Gaza Strip<br>October 6, 2023 to January 5, 2025")) %>%
tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
tab_source_note(source_note = md(paste("Based on household roster and births <br>and IDB US Census Bureau population estimates"))) %>%
cols_merge(
columns=c(l_ci,u_ci),
pattern="({1}, {2})")%>%
cols_label(SE~"Standard<br>error",
l_ci~"Confidence<br>Interval",
.fn = md)
print(CDRTable)
saveTable(CDRTable)
# Sensitivity
makemaintableDF<-function(currentdesign) {
#First select violent deaths - HR06==4
tb1<-round(svyby(~onedummy,~Categories,design=subset(currentdesign,HR06==4),svytotal,vartype=c("se","ci"))) #age and gender
tb1$Categories<-toString(tb1$Categories,s_maincat)
tb1
tb2<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci"))) # violent / nonviolent
names(tb2)[1]<-"Categories"
tb2$Categories<-toString(tb2$Categories,s_Cause_dead_short)
tb2
tbp1<-svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci")) #Women,children, elderly, percent
tbp2<-confint(svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci")))
tb3<-data.frame(Categories=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
tb3
tb4<-round(svyby(~onedummy,~CatMiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci"))) #missing age and gender
names(tb4)[1]<-"Categories"
tb4<-add_row(tb4,Categories=3,onedummy=0, se=0,ci_l=0,ci_u=0,.before=3)
tb4$Categories<-toString(tb4$Categories,s_maincat)
tb4
tb5<-round(svyby(~onedummy,~Allmiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci"))) # missing
names(tb5)[1]<-"Categories"
tb5[1]<-"Total missing persons"
tb5
tbp1<-svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci")) #Women,children, elderly, percent
tbp2<-confint(svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci")))
tb6<-data.frame(CatMiss=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
names(tb6)[1]<-"Categories"
tb6
# make a collected frame for the table
tb1$type<-"Violent deaths"
tb2$type<-"Violent deaths"
tb2$type[2]<-"Nonviolent deaths"
tb3$type<-"Violent deaths"
tb4$type<-"Missing persons"
tb5$type<-"Missing persons"
tb6$type<-"Missing persons"
#Groupframe<-data.frame(Categories="Violent deaths", onedummy="",se="",ci_l="",ci_u="")
MF<-rbind(tb1,tb2[1,],tb3)
#Groupframe<-data.frame(Categories="Nonviolent deaths", onedummy="",se="",ci_l="",ci_u="")
eframe<-data.frame(Categories="Excess nonviolent deaths", onedummy=round(tb2[2,2]-baseline),se=round(tb2[2,3]),ci_l=round(tb2[2,4]-baseline),ci_u=round(tb2[2,5]-baseline),type="Nonviolent deaths")
MF<-rbind(MF,tb2[2,],eframe)
#Groupframe<-data.frame(Categories="Missing persons", onedummy="",se="",ci_l="",ci_u="")
MF<-rbind(MF,tb4, tb5,tb6)
return(MF)
}
usedT<-cbind(makemaintableDF(trimmedrakedesign)[,1:3],data.frame(Raking=rep("1. Main model, full rake, trimmed weights",14)))
names(usedT)<-c("Categories","UsedE","UsedSE","Raking")
eqwdesign<-svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~simpleExpand)
eqd<-cbind(makemaintableDF(eqwdesign)[,1:3],data.frame(Raking=rep("7. Unadjusted equal probability",14)))
names(eqd)<-c("Categories","EQE","EQSE","Raking")
eqd
# Make no Gaza 9 team design. The idea is to make it as if gaza9 never was part of the sample, i.e., not subsetting and retaining sample structure.
# It is debatable what is best.But the choice only affects standard errors.
nog9fates<-subset(fates,Interviewer != "gaza9")
nog9design<-rake(svydesign(data=nog9fates,id=~PSUID,strata=~stratum, weights=~onedummy),sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginalsMICS,governoratemarginals))
noGaza9<-cbind(makemaintableDF(nog9design)[,1:3],data.frame(Raking=rep("2. Team 9 excluded",14)))
names(noGaza9)<-c("Categories","noGE","noGSE","Raking")
# raked, no trim
noTrim<-cbind(makemaintableDF(rakedesign)[,1:3],data.frame(Raking=rep("4. Full rake, no weight trim",14)))
names(noTrim)<-c("Categories","noTrimE","noTrimSE", "Raking")
# No household adjustment
NoHHdesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, governoratemarginals))
NoHH<-cbind(makemaintableDF(NoHHdesign)[,1:3],data.frame(Raking=rep("6. No household size adjustment",14)))
names(NoHH)<-c("Categories","NOHHE","NOHH_SE","Raking")
NoHH
#
# Household adjustment using 2017 HHsizes
HHPDesign<-rake(svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~onedummy),sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00),
population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginals2017,governoratemarginals))
HH2017<-cbind(makemaintableDF(HHPDesign)[,1:3],data.frame(Raking=rep("3. Census 2017 household size adjustment",14)))
names(HH2017)<-c("Categories","HH2017_E","HH2017_SE","Raking")
# Use PCBS Population
PCBSPop23<-2226544+sum(birthsex)
PCBSAdjust=PCBSPop23/USCBPopAtRisk
# Adjust the marginals
PCBSTotalAgeMarginal<-IDBTotalAgeMarginal %>% mutate(Freq=Freq*PCBSAdjust)
PCBSGenderMarginal<-IDBGenderMarginal%>% mutate(Freq=Freq*PCBSAdjust)
PCBShouseholdsizemarginalsMICS<-householdsizemarginalsMICS%>% mutate(Freq=Freq*PCBSAdjust)
PCBSgovernoratemarginal<-governoratemarginals%>% mutate(Freq=Freq*PCBSAdjust)
# Full raked PCBS design
PCBSrakedesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(PCBSTotalAgeMarginal, PCBSGenderMarginal, PCBShouseholdsizemarginalsMICS,PCBSgovernoratemarginal))
PCBSPop<-cbind(makemaintableDF(PCBSrakedesign)[,1:3],data.frame(Raking=rep("5. PCBS population, full rake",14)))
names(PCBSPop)<-c("Categories","PCBSE","PCBS_SE","Raking")
# bind together
senstableDF<-cbind(usedT[1:3],eqd[,2:3],noGaza9[,2:3],noTrim[,2:3],NoHH[2:3],HH2017[2:3],PCBSPop[,2:3]) # Omit raking
senstableDF$type<-c(rep("Violent deaths",5),"Nonviolent deaths %",rep("Nonviolent deaths",2),rep("Missing persons",5),"Missing persons %")
senstableDF
senstab<-senstableDF %>% select(Categories,UsedE, UsedSE,noGE,noGSE,HH2017_E,HH2017_SE,noTrimE,noTrimSE,PCBSE,PCBS_SE,NOHHE,NOHH_SE,EQE,EQSE,type) %>%
group_by(type) %>%
gt(row_group_as_column = TRUE) %>%
opt_table_font(size=9) %>%
fmt_number(n_sigfig = 3)%>%
tab_header(
title = "Sensitivy of estimates to model assumptions") %>%
tab_source_note(source_note = paste("Based on ",maindata)) %>%
tab_spanner(
label = md("<br>Main model"),
columns = c(UsedE,UsedSE)
) %>%
tab_spanner(
label = md("Unadjusted<br>equal probability"),
columns = c(EQE,EQSE)) %>%
tab_spanner(
label = md("<br>Team 9 excluded"),
columns = c(noGE,noGSE)) %>%
tab_spanner(
label = md("Full rake,<br>no weight trim"),
columns = c(noTrimE,noTrimSE)) %>%
tab_spanner(
label = md("Census 2017<br>household size adjustment"),
columns = c(HH2017_E,HH2017_SE)) %>%
tab_spanner(
label = md("No houshold<br>size adjustment"),
columns = c(NOHHE,NOHH_SE)) %>%
tab_spanner(
label = md("PCBS population<br>Full rake"),
columns = c(PCBSE,PCBS_SE)) %>%
cols_label(UsedE="Estimate",
UsedSE~"SE",
EQE="Estimate",
EQSE~"SE",
noGE="Estimate",
noGSE~"SE",
noTrimE="Estimate",
noTrimSE~"SE",
NOHHE="Estimate",
NOHH_SE~"SE",
PCBSE="Estimate",
PCBS_SE="SE",
HH2017_E="Estimate",HH2017_SE="SE",
.fn = md) %>%
tab_footnote(
locations = cells_column_labels(c(UsedSE)),
footnote = "SE: Standard error")
senstab
saveTable(senstab)
# Plot errorbars
# First, make a dataset that works. Instead of using melt, cast and reshape esoterica, simply rbind the constituent sets.
names(usedT)[2:3]<-names(eqd)[2:3]<-names(noGaza9)[2:3]<-names(noTrim)[2:3]<-names(PCBSPop)[2:3]<-names(NoHH)[2:3]<-names(HH2017)[2:3]<-c("Estimate", "SE")
usedT$Type<-eqd$Type<-noGaza9$Type<-noTrim$Type<-PCBSPop$Type<-NoHH$Type<-HH2017$Type<-c(rep("Violent deaths",6),rep("Nonviolent deaths",2),rep("Missing persons",6))
longsensetab<-rbind(usedT,noGaza9,HH2017,noTrim,PCBSPop,NoHH,eqd)
longsensetab
dodge<-position_dodge(width=-0.9) # creates warning in ggplot, but orders legend
gplotdata<-longsensetab %>%filter(!grepl("issing",Categories)) %>%
filter(!grepl(", and",Categories))
sensitivplot<-ggplot(gplotdata,aes(x=paste(Type,Categories,sep=" "),y=Estimate,group=Raking,color=Raking ))+
geom_pointrange(aes(ymin=Estimate-1.96*SE,ymax=Estimate+1.96*SE),position=dodge)+
xlab("")+ylab("Persons (point estimate and lower and upper confidence bounds)")+
coord_flip()+
theme_light()+ theme(legend.position="bottom")+guides(color=guide_legend(ncol=3))
sensitivplot
pdf("Sensitivityplot.pdf", width=7.5,height=7,paper="letter")
print(sensitivplot)
dev.off()