estimates_version for revised Nature submission.R · Gaza-Mortality-Survey

library(tidyverse)
library(haven)
library(webshot2)
library(survey)
#library(expss)  
library(gt)    # must be loaded after expss otherwise tables crash
# setwd("E:\\Dropbox\\Projects\\Gaza\\Data\\Final 1"    #This does not work when using markdown. Data must be in project directory

agebounds<-function(cuts){
  # create typical age labels from a cut string
  out<-character()
  for (i in 1:(length(cuts)-1)) {
    lower<-cuts[i]+1
    if ((is.infinite(lower)) || (lower<0)) lower<-0
    upper<-cuts[i+1]
    if (lower<10) strlower<-paste(" ",lower,sep="") else strlower<-paste(lower)
    if (i<length(cuts)-1) {out<-append(out,paste(strlower,"-",upper,sep=''))}
      else {out<-append(out,paste(lower,"+",sep=''))}
    }
  return(out)
}

toString<-function(x,str) {
  # Stupid function to put in labels in tables (alternative to using factors or strings the whole way)
  # Replaces numbers with corresponding strings
  max<-length(x)
  out<-vector()
  for (i in 1:max) { 
    value<-x[i]
    replacement<-str[value]
    out<-append(out,replacement)   
  }
  return(out)
}

saveTable<-function(table) {
  # Saves a gt table in two formats 
  fname<-deparse(match.call()$table)
  gtsave(table,paste(fname,".docx",sep=""))
  gtsave(table,paste(fname,".pdf",sep=""))
}

#### Load files #####
#Load the roster file and, for weighting, population data from 
#the US Census Bureau.  https://www.census.gov/data-tools/demo/idb/#/pop?dashboard_page=country&COUNTRY_YR_ANIM=2025&CCODE=XG&popPages=BYAGE&menu=popViz&show_countries=n&CCODE_SINGLE=XG&subnat_map_admin=ADM1&POP_YEARS=2022
maindata<-"GMS Household roster.sav"   # note this file has Khan Younis and Deir al Balah switched in order as in the questionnaire
birthsdata="GMS Births.sav"
s_govnames<-c("North Gaza"=1,"Gaza City"=2,"Deir al Balah"=3,"Khan Younis"=4,"Rafah"=5) 
fates1 <- 
  haven::read_sav(maindata)
# recode governorates to reduce my confusion about order
fates1<-fates1 %>% mutate(HR00=case_when(HR00==1~1,
                                      HR00==2~2,
                                      HR00==3~4,
                                      HR00==4~3,
                                      HR00==5~5))
#val_lab(fates1$HR00)<-s_govnames

# Births
births<- 
  haven::read_sav(birthsdata)
# recode governorates to reduce my confusion about order
births<-births %>% mutate(HR00=case_when(HR00==1~1,
                                       HR00==2~2,
                                       HR00==3~4,
                                       HR00==4~3,
                                       HR00==5~5))
#val_lab(births$HR00)<-s_govnames
numbirths=nrow(births)
numbirths
birthsex<-table(births$HR04)

Population <- readxl::read_excel("Population Gaza Single year age groups IDB_2023.xlsx")
# From original IDB download: Changed 100+ to 100, changed variable names
View(Population)
# Calculate adjustment to population at risk
# Needs to be done on all marginals
USCBPop23<-Population$Total[1]     # Total population
Persons.in.roster.file<-nrow(fates1)
AverageExpansion<-USCBPop23/Persons.in.roster.file
AverageExpansion
birthsex<-round(birthsex*AverageExpansion)
TotalAtRisk<-USCBPop23+sum(birthsex)     #using estimated births, rather than projected (due to uncertainty in projected due to conflict)
USCBPopAtRisk<-TotalAtRisk
USCBScaleFactor<-USCBPopAtRisk/USCBPop23
USCBScaleFactor
# The births will be distributed to boys and girls at 0 age.
Population$Total[1]<-TotalAtRisk
Population$Male[1]<-birthsex[1]+Population$Male[1]
Population$Female[2]<-birthsex[2]+Population$Female[2]
Population$Male[2]<-birthsex[1]+Population$Male[2]
Population$Female[1]<-birthsex[2]+Population$Female[1]
#Calculate weights for each person in the roster file
#(now called "fates") so that the percentages in the demographic
#categories listed below match across fates and the estimates of
#the Census Bureau, adjusted for new births
agebreaks<-c(-1, 4, 11, 17,29, 39, 64, Inf)
agestr<-agebounds(agebreaks)
Population$AgeGroups<-as.integer(cut(as.numeric(Population$GROUP), breaks = agebreaks))   #Note change to 0 to 4
AgeGroups<-as.data.frame(group_by(Population,AgeGroups) %>% summarise(Males=sum(Male),
                                               Females=sum(Female)))

AgeGroups<-AgeGroups %>% mutate(AgeGr=case_when(is.na(AgeGroups)~"Total",
                                                .default=agestr[AgeGroups]),
                                AgeGroups=case_when(is.na(AgeGroups)~100,
                                                    .default=AgeGroups))
AgeGroups   

AgeGroups$PctFem<-AgeGroups$Females/(AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total'])*100
AgeGroups$PctMal<-AgeGroups$Males/(AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total'])*100

IDBAgeTable <- AgeGroups %>% select(AgeGr,Males,PctMal, Females,PctFem)%>% gt()%>%
  tab_header(
    title = "Gaza Strip population at risk 2023, broad age groups",
    subtitle = "based on the International Data Base, U..S Census Bureau"
      ) %>%   tab_source_note(
    source_note = "Percentages are based on the total population"
    ) %>% tab_source_note(
      source_note = "Births during survey period added"
    )  %>%   tab_source_note(
  source_note = "Source: https://www.census.gov/ International Programs Main/ Data /International Database."
  ) %>% 
  tab_source_note(
    source_note = "Downloaded 25.01.2025" 
  ) %>% 
  cols_label(
   AgeGr="Age", PctMal="Male percent", PctFem="Female percent" 
  ) %>% 
  fmt_number(n_sigfig = 3) %>%
  fmt_number(
    columns=c(PctMal,PctFem),
    decimals=1
  )
  print(IDBAgeTable)
  saveTable(IDBAgeTable)
  #### Marginals ####
  # Construct marginals
  IDBGenderMarginal<-data.frame(HR04=c(1,2), Freq=c(AgeGroups$Males[nrow(AgeGroups)],AgeGroups$Females[nrow(AgeGroups)]))  # IDB Gender 
  IDBGenderMarginal
  IDBTotalAge<-AgeGroups$Males[-nrow(AgeGroups)]+ AgeGroups$Females[-nrow(AgeGroups)]
  IDBTotalAgeMarginal<-data.frame(AgeGroups=seq(1,length(IDBTotalAge)),Freq=IDBTotalAge)
  IDBTotalAgeMarginal
  sum(IDBTotalAgeMarginal$Freq)==sum(IDBGenderMarginal$Freq)   # This should be true
  # Also construct a marginal that is a blend of age and gender - to get that exact
  IDBMales<-data.frame(AgeSex=paste(agestr,"Male"),Freq=AgeGroups$Males[-nrow(AgeGroups)])
  IDBFemales<-data.frame(AgeSex=paste(agestr,"Female"),Freq=AgeGroups$Females[-nrow(AgeGroups)])
  IDBSeparateSexesAndAgeMarginal<-bind_rows(IDBMales,IDBFemales)
  IDBSeparateSexesAndAgeMarginal
  sum(IDBSeparateSexesAndAgeMarginal$Freq)==sum(IDBGenderMarginal$Freq)   # This should be true
#### Age gender table ####
  # Age Gender table from roster file

fates<-rbind(fates1,births)  
fates$AgeGroups<-as.integer(cut(as.numeric(fates$HR03), breaks = agebreaks))
fates<-fates %>% mutate(AgeGr=case_when(is.na(AgeGroups)~"Total",
                                                     .default=agestr[AgeGroups]),
                                        Gender=case_when(HR04==1~'Male',
                                        HR04==2~'Female'))
fates$AgeSex<-paste(fates$AgeGr,fates$Gender,sep=" ")

RosterAge<-fates %>% group_by(AgeGr,Gender)  %>% summarize(sumpop=n())
RosterAge<-reshape(as.data.frame(RosterAge),idvar="AgeGr", timevar="Gender",direction="wide")
RosterAge
names(RosterAge)<-c('AgeGr','FemaleRoster','MaleRoster')
RosterAge<-rbind(RosterAge,data.frame(AgeGr='Total',FemaleRoster=sum(RosterAge$FemaleRoster),MaleRoster=sum(RosterAge$MaleRoster)))
RosterAge$PctFemRoster<-RosterAge$FemaleRoster/(RosterAge$MaleRoster[RosterAge$AgeGr=='Total']+RosterAge$FemaleRoster[RosterAge$AgeGr=='Total'])*100
RosterAge$PctMalRoster<-RosterAge$MaleRoster/(RosterAge$MaleRoster[RosterAge$AgeGr=='Total']+RosterAge$FemaleRoster[RosterAge$AgeGr=='Total'])*100
RosterAge
SurveyAgeTable <- RosterAge %>% select(AgeGr,MaleRoster,PctMalRoster, FemaleRoster,PctFemRoster) %>% 
  gt()%>%
  tab_header(
    title = "Household members October 6, 2023 and births in survey period by broad age groups" ) %>% 
      tab_source_note(source_note = "Unweighted. Percentages are based on the total")  %>% 
      tab_source_note(source_note = paste("Based on ",maindata))  %>% 
  cols_label(
    AgeGr="Age",
    MaleRoster="Male",PctMalRoster="Percent male", FemaleRoster="Females",PctFemRoster="Percent female"
  ) %>% 
  fmt_number(
    columns=c(PctMalRoster,PctFemRoster),
    decimals=1
  )

print(SurveyAgeTable)
saveTable(SurveyAgeTable)

#### Governorate marginals ####
# Construct governorate marginals 
# Since the adjusted weights also should reflect what we think is reality, not only the sampling
# we use governorate marginals adjusted to the IDB Census Bureau population.
#Governorate  	PCBS 2023	IDB adjusted	Questionnaire code	Logical code	PSUs
#North Gaza   	444412	418833	        1	                    1	           36
#Gaza City	    749100	705983	        2	                    2	           72
#Deir al Balah	319208	300835	        4	                    3	           28
#Khan Younis	  438557	413315	        3	                    4	           42
#Rafah	        275267	259423	        5	                    5	           22
# Note that the scale factor for new births is applied

governoratemarginals<-data.frame(HR00=seq(1,5,1), Freq=round(c( 418833,705983, 300835,413315, 259423)*USCBScaleFactor ))
#val_lab(governoratemarginals$HR00)<-s_govnames
governoratemarginals                                    

# Note that governorate marginals are used on governorate of origin, not of interview.

# Construct household size marginals
# Note that we have to scale to current population and since we are using the roster file
# we have to use marginals that are population in each size group
householdsize2017 <-c(10661,31474,34084,43481,49305, 50239,42741,30693,19041,(10547 + 12444))
hhsizes<-c(seq(1,9,1),11.43151668)
popbyhouseholdsize2017<-householdsize2017*hhsizes
scalefactor2017to2023<-sum(IDBGenderMarginal$Freq)/sum(popbyhouseholdsize2017)
householdsizemarginals2017<-popbyhouseholdsize2017*scalefactor2017to2023
householdsizemarginals2017<-data.frame(hsize10=seq(1,10,1),Freq=householdsizemarginals2017)
householdsizemarginals2017
# Here from MICS 2019-2020 household file (Downloaded from https://mics.unicef.org/surveys)
#the initial data are persons in each household size group
hhsizemarginalsMICS<-c(110,
                          501,
                          1069,
                          1841,
                          2458,
                          3218,
                          3289,
                          2255,
                          1816,
                          2502)  # From 1 to 10+
MICSScalefactor<-sum(IDBGenderMarginal$Freq)/sum(hhsizemarginalsMICS)
householdsizemarginalsMICS<-hhsizemarginalsMICS*MICSScalefactor

householdsizemarginalsMICS<-data.frame(hsize10=seq(1,10,1),Freq=householdsizemarginalsMICS)
householdsizemarginalsMICS

# Difference between distributions based on 2017 and MICS
round(householdsizemarginals2017$Freq-householdsizemarginalsMICS$Freq)
sum(householdsizemarginals2017$Freq)-sum(householdsizemarginalsMICS$Freq)    # Should be 0

# get household sizes into the data file
fates$onedummy<-1   # simple counting variable that is also used initially for weights
# fates$Total<-1
pop23<-AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total']
fates$simpleExpand<-pop23/nrow(fates)
hsiz=aggregate(fates$onedummy,list(fates$prikey),sum)
names(hsiz)<-c("prikey","hsize")
fates<-left_join(fates,hsiz,by="prikey")
fates$hsize10<-ifelse(fates$hsize<10,fates$hsize,10)
##### Construct survey design ####
#Construct strata, weights. Also add variables for sample margins, and survey
#design
# Constructed total marginals are:
# AgeSexmatches IDBSeparateSexesAndAgeMarginal 
# AgeGroups matches IDBTotalAgeMarginal
# HR04 matches IDBGenderMarginal
# AgeSex matches IDBSeparateSexesAndAgeMarginal
# hsize10 matches householdsizemarginalsMICS
# hsize10 matches householdsizemarginals2017

# Some useful strings 
s_mf<-c("Male", "Female")
s_status<-c("Resident","Left the Gaza Strip","Elsewhere in the Gaza Strip","Dead","Missing","Imprisoned" )
s_cause_dead_full<-c("Disease etc","Disease etc, no aid","Accident","Violent", "Not known")
s_Cause_dead_short<-c("Total violent deaths","Total nonviolent deaths")
s_governorates<-c("Northern Gaza","Gaza city","Deir al-Balah","Khan Younis","Rafah")
s_maincat<-c("Children (<18)","Women (18-64)","Elderly (65+)","Men (18-64)")
s_WEC<-"Women, elderly, and children"

#### Category coding ####
# categories for reporting - should be done before design definitions since design incorporates data
fates<- fates %>% mutate(Categories=case_when(HR07< 18 ~ 1,
                                              HR04==2 & HR07>17 & HR07<65 ~ 2,
                                              HR07>64 ~3,
                                              HR04==1 & HR07>17 & HR07<65 ~ 4),
                         WEC=case_when(is.na(HR07)~0,
                                             HR04==2 | (HR07<18 | HR07>64)~1,
                                             HR04==1 & (HR07>17 & HR07<65)~0),
                         WECmiss=case_when(HR04==2 | (HR03<18 | HR03>64)~1,
                                             HR04==1 & (HR03>17 & HR03<65)~0),
                         Allmiss=if_else(HR05==5,1,0),
                         CatMiss=case_when(HR03< 18 ~ 1,
                                              HR04==2 & HR03>17 & HR03<65 ~ 2,
                                              HR03>64 ~3,
                                              HR04==1 & HR03>17 & HR03<65 ~ 4),
                         typedeath=case_when(HR06==4~1,
                                             HR06<4 ~2),
                         Dead=if_else(HR05==4,1,0),
                         Violent=if_else(HR05==4& HR06==4,1,0),
                         Nonviolent=if_else(HR05==4 & HR06<4,1,0),
                         Missing=if_else(HR05==5,1,0))
fates<- fates %>% mutate(ratedenominator = case_when(HR01>50 & (HR05==4 | HR05==2)~0.25,
                                                     HR01>5~0.5,
                                                     HR05==1~1,
                                                     HR05==2~0.5,
                                                     HR05==3~1,
                                                     HR05==4~0.5,
                                                     HR05==5~1,
                                                     HR05==6~1))


fates$stratum<-fates$HR00*10+fates$PSUType
uwdesign<-svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~onedummy)    # uw prefix means unweighted
tbs<-svytable(~HR05+HR04,uwdesign)
ftable(tbs)

#  The following should match (simple unweighted table)
xtabs(~HR05+HR04,fates)
# 
# Full raked design
rakedesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginalsMICS,governoratemarginals))
svytable(~HR04+HR05,rakedesign)
svyby(~onedummy,~HR04,design=rakedesign,svytotal)
#
# Check weights
#
weights<-weights(rakedesign)
boxplot(weights)
summary(weights)
# This is a bit high variation
# so let us fix that
trimmedrakedesign<-trimWeights(rakedesign, upper=median(weights*2), lower=90)
weights<-weights(trimmedrakedesign)
boxplot(weights)
summary(weights)

# Export data file
GMS_Used_data<-model.frame(trimmedrakedesign)
GMS_Used_data<-cbind(GMS_Used_data,weights)
saveRDS(GMS_Used_data,file="GMS_at_risk_apr_18_2025.rds")
rm(GMS_Used_data)

#### Main table ####
# Master table - trimmed rakeddesign
currentdesign<-trimmedrakedesign

tbl<-round(svyby(~onedummy,~HR05+~HR04,design=currentdesign,svytotal,vartype=c("se","ci")))
tbl
tbl$HR05<-toString(tbl$HR05,s_status)
tbl$HR04<-toString(tbl$HR04,s_mf)
names(tbl)<-c("Status","Gender", "Estimate","se","ci_l","ci_u")
tbl
tblT<-round(svyby(~onedummy,~HR05,design=currentdesign,svytotal,vartype=c("se","ci")))

tblT$HR05<-toString(tblT$HR05,s_status)
tblT$Gender<-rep("Total",length(s_status))
tblT<-tblT[,c("HR05","Gender","onedummy","se","ci_l","ci_u")]
tblT
names(tblT)<-c("Status","Gender", "Estimate","se","ci_l","ci_u")
tblT
tblT<-bind_rows(tbl,tblT)
mainoutcomestable<-tblT%>% 
  group_by(Gender) %>%
  gt(row_group_as_column = TRUE) %>%
  tab_header(
    title = "Gaza Strip Survey main status outcomes") %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size") %>%
  tab_source_note(source_note = paste("Based on Gaza Mortality Survey ",maindata))  %>% 
    cols_merge(
       columns=c(ci_l,ci_u),
       pattern="({1}, {2})")%>%
    cols_label(se~"Standard<br>error",
               ci_l~"Confidence<br>Interval",
               .fn = md) %>% 
     fmt_number(n_sigfig = 3)
mainoutcomestable
saveTable(mainoutcomestable)


# main table of deaths
# Baseline deaths are IDB estimated deaths 2023 6348, 2024 6188,2025 6043  87 days in 2023
#https://www.census.gov/data-tools/demo/idb/#/table?dashboard_page=sources&COUNTRY_YR_ANIM=2023&CCODE_SINGLE=XG&subnat_map_admin=ADM1&CCODE=XG&COUNTRY_YEAR=2023&menu=tableViz&TABLE_YEARS=2023,2024,2025&TABLE_USE_RANGE=N&TABLE_USE_YEARS=Y&TABLE_STEP=1&TABLE_ADD_YEARS=2023,2024,2025&quickReports=CUSTOM&CUSTOM_COLS=CDR,DEATHS
# Downloaded 8.2.2025
# Median day of fieldwork is Jan 2, 2025
end <- as.Date("2023-12-31")
start <- strptime("2023-10-06", format="%Y-%m-%d", tz="UTC")
DaysIn2023<-as.numeric(difftime(as.POSIXct(end), as.POSIXct(start, tz="UTC"), units="days"))
DaysIn2025=2
baseline<-6348*DaysIn2023/365+6188+6043*DaysIn2025/365    # might be considered slightly misplaced concreteness
# Define a function to make it easier to repeat
#### Makemaintable function ####
makemaintable<-function(currentdesign,designdescription) {

    #First select violent deaths - HR06==4
    tb1<-round(svyby(~onedummy,~Categories,design=subset(currentdesign,HR06==4),svytotal,vartype=c("se","ci")))  #age and gender
    tb1$Categories<-toString(tb1$Categories,s_maincat)
    tbwec<- svyby(~onedummy,~WEC,design=subset(currentdesign,HR06==4 & WEC==1),svytotal,vartype=c("se","ci"))
    names(tbwec)[1]<-"Categories"  
    tbwec$Categories<-s_WEC
    tb2<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))   # violent / nonviolent
    names(tb2)[1]<-"Categories"
    tb2$Categories<-toString(tb2$Categories,s_Cause_dead_short)
    tb2
    
    tbp1<-svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci"))   #Women,children, elderly, percent
    tbp2<-confint(svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci")))
    tb3<-data.frame(Categories=paste(s_WEC," (%)"),onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
    tb3
    tb4<-round(svyby(~onedummy,~CatMiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))  #missing age and gender
    names(tb4)[1]<-"Categories"
    tb4<-add_row(tb4,Categories=3,onedummy=0, se=0,ci_l=0,ci_u=0,.before=3)
    tb4$Categories<-toString(tb4$Categories,s_maincat)
    tb4
    tb5<-round(svyby(~onedummy,~Allmiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))   # missing
    names(tb5)[1]<-"Categories"
    tb5[1]<-"Total missing persons"
    tb5
    tbp1<-svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci"))   #Women,children, elderly, percent
    tbp2<-confint(svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci")))
    tb6<-data.frame(CatMiss=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
    names(tb6)[1]<-"Categories"
    tb6
    
    # make a collected frame for the table
    tb1$type<-"Violent deaths"
    tb2$type<-"Violent deaths"
    tb2$type[2]<-"Nonviolent deaths"
    tb3$type<-"Violent deaths"
    tb4$type<-"Missing persons"
    tb5$type<-"Missing persons"
    tb6$type<-"Missing persons"
    tbwec$type<-"Violent deaths"
    #
    MF<-rbind(tb1,tbwec,tb2[1,],tb3)
    
    eframe<-data.frame(Categories="Excess nonviolent deaths", onedummy=round(tb2[2,2]-baseline),se=round(tb2[2,3]),ci_l=round(tb2[2,4]-baseline),ci_u=round(tb2[2,5]-baseline),type="Nonviolent deaths")
    MF<-rbind(MF,tb2[2,],eframe)
    
    MF<-rbind(MF,tb4, tb5,tb6)
    MF
    row_number(MF)
    return(MF%>% select(type,Categories,onedummy,se,ci_l,ci_u) %>%
      group_by(type) %>%
      gt(row_group_as_column = TRUE) %>%
      fmt_number(n_sigfig = 3)%>%
      tab_header(
        title = md("Estimates of violent, nonviolent mortality and missing persons<br>by broad age and gender groups")) %>%
        tab_source_note(source_note =designdescription ) %>%
        tab_source_note(source_note = paste("Based on ",maindata))  %>% 
      cols_merge(
        columns=c(ci_l,ci_u),
        pattern="({1}, {2})")%>%
      cols_label(se~"Standard<br>error",
                 ci_l~"Confidence<br>Interval",
                 onedummy="Estimate",
                 .fn = md)
      )

}

trimraketable<-makemaintable(trimmedrakedesign,"Raked to age,gender, governorate, and household size")
print(trimraketable)
saveTable(trimraketable)

# Make a table of deaths per governorate
tb1<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))  # all deaths by governorate
tb1$type<-"Total"
names(tb1)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb1$Governorate<-toString(tb1$Governorate,s_governorates)
tb1
tb2<-round(ftable(svytotal(~onedummy,design=subset(currentdesign,HR05==4,vartype=c("se","ci"))) ))     #total deaths, all governorates
tb2ci<-ftable(round(confint(svytotal(~onedummy,design=subset(currentdesign,HR05==4) ))))
tb2<-data.frame(Governorate="Total",Estimate=tb2[1,1],se=tb2[1,2],ci_l=tb2ci[1,1],ci_l=tb2ci[1,2],type="Total")
names(tb2)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb2[1,1]<-"Total"
tb2
tb3<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))   # Total violent / nonviolent
tb3$type=c("Violent deaths","Nonviolent deaths")
names(tb3)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb3$Governorate<-"Total"
tb3
tb4<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,typedeath==1),svytotal,vartype=c("se","ci")))   # violent 
tb4$type<-"Violent deaths"
names(tb4)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb4$Governorate<-toString(tb4$Governorate,s_governorates)
tb4
tb5<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,typedeath==2),svytotal,vartype=c("se","ci")))   # Nonviolent 
tb5$type<-"Nonviolent deaths"
names(tb5)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb5$Governorate<-toString(tb5$Governorate,s_governorates)
tb5
# collect t he governorate table
govtab<-rbind(tb1,tb2,tb4,tb3[1,],tb5,tb3[2,])
governoratetable<-govtab %>% select(type,Governorate,Estimate,se,ci_l,ci_u) %>%
  group_by(type) %>%
  gt(row_group_as_column = TRUE) %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = md("Estimates of violent, nonviolent mortality<br>by governorate of residence on October 6, 2023")) %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
  tab_source_note(source_note = paste("Based on ",maindata))  %>% 
  cols_merge(
    columns=c(ci_l,ci_u),
    pattern="({1}, {2})")%>%
  cols_label(se~"Standard<br>error",
             ci_l~"Confidence<br>Interval",
             .fn = md)
print(governoratetable)
saveTable(governoratetable)

#### Crude Death Rates #####
# calculate crude death rate, based on IDPpop
# Use numerators and denominator as defined in "category coding" above

ViolentCDR<-svyratio(~Violent,~ratedenominator,design=currentdesign)
NonviolentCDR<-svyratio(~Nonviolent,~ratedenominator,design=currentdesign)
TotalCDR<-svyratio(~Dead,~ratedenominator,design=currentdesign)
ViolentCDRCI<-confint(svyratio(~Violent,~ratedenominator,design=currentdesign))
NonviolentCDRCI<-confint(svyratio(~Nonviolent,~ratedenominator,design=currentdesign))
TotalCDRCI<-confint(svyratio(~Dead,~ratedenominator,design=currentdesign))



tabRatesA<-data.frame(Type=c("Violent","Nonviolent","Total"),CDR=c(ViolentCDR$ratio[1,1],NonviolentCDR$ratio[1,1],TotalCDR$ratio[1,1])*1000*12/15,
                    SE=sqrt(c(ViolentCDR$var[1,1],NonviolentCDR$var[1,1],TotalCDR$var[1,1]))*1000*12/5,
                    l_ci=c(ViolentCDRCI[1,1],NonviolentCDRCI[1],TotalCDRCI[1])*1000*12/15,
                    u_ci=c(ViolentCDRCI[1,2],NonviolentCDRCI[2],TotalCDRCI[2])*1000*12/15)

AnnualizedCDRTable<-tabRatesA %>% gt() %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = md("Annualized mortality rates for the Gaza Strip<br>October 6, 2023 to January 5, 2025")) %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
  tab_source_note(source_note = md(paste("Based on household roster and births <br>and IDB US Census Bureau population estimates"))) %>%
  cols_merge(
    columns=c(l_ci,u_ci),
    pattern="({1}, {2})")%>%
  cols_label(SE~"Standard<br>error",
             l_ci~"Confidence<br>Interval",
             .fn = md)
print(AnnualizedCDRTable)
saveTable(AnnualizedCDRTable)
#
tabRates<-data.frame(Type=c("Violent","Nonviolent","Total"),CDR=c(ViolentCDR$ratio[1,1],NonviolentCDR$ratio[1,1],TotalCDR$ratio[1,1])*1000,
                     SE=sqrt(c(ViolentCDR$var[1,1],NonviolentCDR$var[1,1],TotalCDR$var[1,1]))*1000,
                     l_ci=c(ViolentCDRCI[1,1],NonviolentCDRCI[1],TotalCDRCI[1])*1000,
                     u_ci=c(ViolentCDRCI[1,2],NonviolentCDRCI[2],TotalCDRCI[2])*1000)
CDRTable<-tabRates %>% gt() %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = md("Mortality rates for the Gaza Strip<br>October 6, 2023 to January 5, 2025")) %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
  tab_source_note(source_note = md(paste("Based on household roster and births <br>and IDB US Census Bureau population estimates"))) %>%
cols_merge(
  columns=c(l_ci,u_ci),
  pattern="({1}, {2})")%>%
  cols_label(SE~"Standard<br>error",
             l_ci~"Confidence<br>Interval",
             .fn = md)

print(CDRTable)
saveTable(CDRTable)



# Sensitivity
makemaintableDF<-function(currentdesign) {
  
  #First select violent deaths - HR06==4
  tb1<-round(svyby(~onedummy,~Categories,design=subset(currentdesign,HR06==4),svytotal,vartype=c("se","ci")))  #age and gender
  tb1$Categories<-toString(tb1$Categories,s_maincat)
  tb1    
  tb2<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))   # violent / nonviolent
  names(tb2)[1]<-"Categories"
  tb2$Categories<-toString(tb2$Categories,s_Cause_dead_short)
  tb2
  tbp1<-svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci"))   #Women,children, elderly, percent
  tbp2<-confint(svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci")))
  tb3<-data.frame(Categories=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
  tb3
  tb4<-round(svyby(~onedummy,~CatMiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))  #missing age and gender
  names(tb4)[1]<-"Categories"
  tb4<-add_row(tb4,Categories=3,onedummy=0, se=0,ci_l=0,ci_u=0,.before=3)
  tb4$Categories<-toString(tb4$Categories,s_maincat)
  tb4
  tb5<-round(svyby(~onedummy,~Allmiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))   # missing
  names(tb5)[1]<-"Categories"
  tb5[1]<-"Total missing persons"
  tb5
  tbp1<-svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci"))   #Women,children, elderly, percent
  tbp2<-confint(svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci")))
  tb6<-data.frame(CatMiss=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
  names(tb6)[1]<-"Categories"
  tb6
  
  # make a collected frame for the table
  tb1$type<-"Violent deaths"
  tb2$type<-"Violent deaths"
  tb2$type[2]<-"Nonviolent deaths"
  tb3$type<-"Violent deaths"
  tb4$type<-"Missing persons"
  tb5$type<-"Missing persons"
  tb6$type<-"Missing persons"
  
  #Groupframe<-data.frame(Categories="Violent deaths", onedummy="",se="",ci_l="",ci_u="")
  MF<-rbind(tb1,tb2[1,],tb3)
  #Groupframe<-data.frame(Categories="Nonviolent deaths", onedummy="",se="",ci_l="",ci_u="")
  eframe<-data.frame(Categories="Excess nonviolent deaths", onedummy=round(tb2[2,2]-baseline),se=round(tb2[2,3]),ci_l=round(tb2[2,4]-baseline),ci_u=round(tb2[2,5]-baseline),type="Nonviolent deaths")
  MF<-rbind(MF,tb2[2,],eframe)
  #Groupframe<-data.frame(Categories="Missing persons", onedummy="",se="",ci_l="",ci_u="")
  MF<-rbind(MF,tb4, tb5,tb6)
  return(MF)
}
usedT<-cbind(makemaintableDF(trimmedrakedesign)[,1:3],data.frame(Raking=rep("1. Main model, full rake, trimmed weights",14)))
names(usedT)<-c("Categories","UsedE","UsedSE","Raking")
eqwdesign<-svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~simpleExpand)
eqd<-cbind(makemaintableDF(eqwdesign)[,1:3],data.frame(Raking=rep("7. Unadjusted equal probability",14)))
names(eqd)<-c("Categories","EQE","EQSE","Raking")
eqd
# Make no Gaza 9 team design. The idea is to make it as if gaza9 never was part of the sample, i.e., not subsetting and retaining sample structure.
# It is debatable what is best.But the choice only affects standard errors.
nog9fates<-subset(fates,Interviewer != "gaza9")
nog9design<-rake(svydesign(data=nog9fates,id=~PSUID,strata=~stratum, weights=~onedummy),sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginalsMICS,governoratemarginals))
noGaza9<-cbind(makemaintableDF(nog9design)[,1:3],data.frame(Raking=rep("2. Team 9 excluded",14)))   
names(noGaza9)<-c("Categories","noGE","noGSE","Raking")
# raked, no trim
noTrim<-cbind(makemaintableDF(rakedesign)[,1:3],data.frame(Raking=rep("4. Full rake, no weight trim",14)))
names(noTrim)<-c("Categories","noTrimE","noTrimSE", "Raking")
# No household adjustment
NoHHdesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, governoratemarginals))
NoHH<-cbind(makemaintableDF(NoHHdesign)[,1:3],data.frame(Raking=rep("6. No household size adjustment",14)))
names(NoHH)<-c("Categories","NOHHE","NOHH_SE","Raking")
NoHH
#
# Household adjustment using 2017 HHsizes
HHPDesign<-rake(svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~onedummy),sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), 
                population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginals2017,governoratemarginals))
HH2017<-cbind(makemaintableDF(HHPDesign)[,1:3],data.frame(Raking=rep("3. Census 2017 household size adjustment",14)))   
names(HH2017)<-c("Categories","HH2017_E","HH2017_SE","Raking")

# Use PCBS Population
PCBSPop23<-2226544+sum(birthsex)
PCBSAdjust=PCBSPop23/USCBPopAtRisk
# Adjust the marginals
PCBSTotalAgeMarginal<-IDBTotalAgeMarginal %>% mutate(Freq=Freq*PCBSAdjust)

PCBSGenderMarginal<-IDBGenderMarginal%>% mutate(Freq=Freq*PCBSAdjust)
PCBShouseholdsizemarginalsMICS<-householdsizemarginalsMICS%>% mutate(Freq=Freq*PCBSAdjust)
PCBSgovernoratemarginal<-governoratemarginals%>% mutate(Freq=Freq*PCBSAdjust)
# Full raked PCBS design
PCBSrakedesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(PCBSTotalAgeMarginal, PCBSGenderMarginal, PCBShouseholdsizemarginalsMICS,PCBSgovernoratemarginal))
PCBSPop<-cbind(makemaintableDF(PCBSrakedesign)[,1:3],data.frame(Raking=rep("5. PCBS population, full rake",14)))
names(PCBSPop)<-c("Categories","PCBSE","PCBS_SE","Raking")

# bind together
senstableDF<-cbind(usedT[1:3],eqd[,2:3],noGaza9[,2:3],noTrim[,2:3],NoHH[2:3],HH2017[2:3],PCBSPop[,2:3])   # Omit raking
senstableDF$type<-c(rep("Violent deaths",5),"Nonviolent deaths %",rep("Nonviolent deaths",2),rep("Missing persons",5),"Missing persons %")
senstableDF
senstab<-senstableDF %>% select(Categories,UsedE, UsedSE,noGE,noGSE,HH2017_E,HH2017_SE,noTrimE,noTrimSE,PCBSE,PCBS_SE,NOHHE,NOHH_SE,EQE,EQSE,type) %>%
  group_by(type) %>%
  gt(row_group_as_column = TRUE) %>%
  opt_table_font(size=9) %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = "Sensitivy of estimates to model assumptions") %>%
  tab_source_note(source_note = paste("Based on ",maindata))  %>% 
  tab_spanner(
    label = md("<br>Main model"),
    columns = c(UsedE,UsedSE)
  ) %>% 
  tab_spanner(
    label = md("Unadjusted<br>equal probability"),
    columns = c(EQE,EQSE)) %>% 
          tab_spanner(
            label = md("<br>Team 9 excluded"),
            columns = c(noGE,noGSE)) %>%
  tab_spanner(
    label = md("Full rake,<br>no weight trim"),
    columns = c(noTrimE,noTrimSE)) %>%
  tab_spanner(
    label = md("Census 2017<br>household size adjustment"),
    columns = c(HH2017_E,HH2017_SE)) %>%
  tab_spanner(
    label = md("No houshold<br>size adjustment"),
    columns = c(NOHHE,NOHH_SE)) %>%
  tab_spanner(
    label = md("PCBS population<br>Full rake"),
    columns = c(PCBSE,PCBS_SE)) %>%
  cols_label(UsedE="Estimate",
             UsedSE~"SE",
             EQE="Estimate",
             EQSE~"SE",
             noGE="Estimate",
             noGSE~"SE",
             noTrimE="Estimate",
             noTrimSE~"SE",
             NOHHE="Estimate",
             NOHH_SE~"SE",
             PCBSE="Estimate",
             PCBS_SE="SE",
             HH2017_E="Estimate",HH2017_SE="SE",
             .fn = md) %>%
  tab_footnote(
    locations = cells_column_labels(c(UsedSE)),
    footnote = "SE: Standard error")

senstab
saveTable(senstab)

# Plot errorbars 
# First, make a dataset that works. Instead of using melt, cast and reshape esoterica, simply rbind the constituent sets.
names(usedT)[2:3]<-names(eqd)[2:3]<-names(noGaza9)[2:3]<-names(noTrim)[2:3]<-names(PCBSPop)[2:3]<-names(NoHH)[2:3]<-names(HH2017)[2:3]<-c("Estimate", "SE")
usedT$Type<-eqd$Type<-noGaza9$Type<-noTrim$Type<-PCBSPop$Type<-NoHH$Type<-HH2017$Type<-c(rep("Violent deaths",6),rep("Nonviolent deaths",2),rep("Missing persons",6))
longsensetab<-rbind(usedT,noGaza9,HH2017,noTrim,PCBSPop,NoHH,eqd)
longsensetab
dodge<-position_dodge(width=-0.9)   # creates warning in ggplot, but orders legend
gplotdata<-longsensetab %>%filter(!grepl("issing",Categories)) %>%
                         filter(!grepl(", and",Categories))
sensitivplot<-ggplot(gplotdata,aes(x=paste(Type,Categories,sep=" "),y=Estimate,group=Raking,color=Raking ))+
  geom_pointrange(aes(ymin=Estimate-1.96*SE,ymax=Estimate+1.96*SE),position=dodge)+
  xlab("")+ylab("Persons (point estimate and lower and upper confidence bounds)")+
  coord_flip()+
  theme_light()+ theme(legend.position="bottom")+guides(color=guide_legend(ncol=3))
  
sensitivplot
pdf("Sensitivityplot.pdf", width=7.5,height=7,paper="letter")
print(sensitivplot)
dev.off()