Gaza Strip Mortality version for revised nature submission.rmd · Gaza-Mortality-Survey

---
title: "The Gaza Strip Mortality Survey"
author: "Jon Pedersen/Mike Spagat/Gaza Strip mortality survey project group"
date: "`r Sys.Date()`"
output: html_document
---
Change log April 22, 2025:

* Changed to new household roster input file, and adapted governorate order to fit new file
* Added births during the survey period as part of the population at risk
* Changed code for death rates, with confidence intervals (now as a ratio estimate directly from data file insted of using aggregates)
* Added sensitivity check for PCBS population size, variants of household size assumptions
* Miscellaneous changes because of new variable names, wrong headings, etc
* Added total number of women, elderly and children to main table


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


# Introduction
Library loading and some utility functions
```{r loadlib, warning=FALSE}
library(tidyverse)
library(haven)
library(webshot2)
library(survey)
#library(expss)
library(gt)    # must be loaded after expss otherwise tables crash

agebounds<-function(cuts){
  # create typical age labels from a cut string
  out<-character()
  for (i in 1:(length(cuts)-1)) {
    lower<-cuts[i]+1
    if ((is.infinite(lower)) || (lower<0)) lower<-0
    upper<-cuts[i+1]
    if (lower<10) strlower<-paste(" ",lower,sep="") else strlower<-paste(lower)
    if (i<length(cuts)-1) {out<-append(out,paste(strlower,"-",upper,sep=''))}
     else {out<-append(out,paste(lower,"+",sep=''))}
  }
  return(out)
}

toString<-function(x,str) {
  # Stupid function to put in labels in tables (alternative to using factors or strings the whole way)
  # Replaces numbers with corresponding strings
  max<-length(x)
  out<-vector()
  for (i in 1:max) { 
    value<-x[i]
    replacement<-str[value]
    out<-append(out,replacement)   
  }
  return(out)
}

saveTable<-function(table) {
  # Saves a gt table in two formats - word and pdf
  fname<-deparse(match.call()$table)
  gtsave(table,paste(fname,".docx",sep=""))
  gtsave(table,paste(fname,".pdf",sep=""))
}
```
Output R version 
```{R version}
R.version.string
```
## Age groups defintions
If one wants to have different age groups, this is the place to change - not in the actual age group/tabulation code
The agebreaks are suitable to the cut function, and the agstr contains label strings for the age groups
```{r definitions}
agebreaks<-c(-1, 4, 11, 17,29, 39, 64, Inf)
agestr<-agebounds(agebreaks)
```
#Load data files 
<br>Load the roster file, the births file and the IDB population file\
Note that since knitr is used, it is best to put the data in the default directory  
In original IDB download: Changed 100+ to 100, changed variable names (done in excel)\
https://www.census.gov/data-tools/demo/idb/#/pop?dashboard_page=country&COUNTRY_YR_ANIM=2025&CCODE=XG&popPages=BYAGE&menu=popViz&show_countries=n&CCODE_SINGLE=XG&subnat_map_admin=ADM1&POP_YEARS=2023,2024
```{r read data}
maindata<-"GMS Household roster.sav"   # note this file has Khan Younis and Deir al Balah switched in order as in the questionnaire
birthsdata="GMS Births.sav"
s_govnames<-c("North Gaza"=1,"Gaza City"=2,"Deir al Balah"=3,"Khan Younis"=4,"Rafah"=5) 
fates1 <- 
  haven::read_sav(maindata)
# recode governorates to reduce my confusion about order
fates1<-fates1 %>% mutate(HR00=case_when(HR00==1~1,
                                      HR00==2~2,
                                      HR00==3~4,
                                      HR00==4~3,
                                      HR00==5~5))
#val_lab(fates1$HR00)<-s_govnames

# Births
births<- 
  haven::read_sav(birthsdata)
# recode governorates to reduce my confusion about order
births<-births %>% mutate(HR00=case_when(HR00==1~1,
                                       HR00==2~2,
                                       HR00==3~4,
                                       HR00==4~3,
                                       HR00==5~5))
#val_lab(births$HR00)<-s_govnames
numbirths=nrow(births)
numbirths
birthsex<-table(births$HR04)

Population <- readxl::read_excel("Population Gaza Single year age groups IDB_2023.xlsx")
# From original IDB download: Changed 100+ to 100, changed variable names
# View(Population)
# Calculate adjustment to population at risk
# Needs to be done on all marginals
USCBPop23<-Population$Total[1]     # Total population
Persons.in.roster.file<-nrow(fates1)
AverageExpansion<-USCBPop23/Persons.in.roster.file
AverageExpansion
birthsex<-round(birthsex*AverageExpansion)
TotalAtRisk<-USCBPop23+sum(birthsex)     #using estimated births, rather than projected (due to uncertainty in projected due to conflict)
USCBPopAtRisk<-TotalAtRisk
USCBScaleFactor<-USCBPopAtRisk/USCBPop23
USCBScaleFactor
# The births will be distributed to boys and girls at 0 age.
Population$Total[1]<-TotalAtRisk
Population$Male[1]<-birthsex[1]+Population$Male[1]
Population$Female[2]<-birthsex[2]+Population$Female[2]
Population$Male[2]<-birthsex[1]+Population$Male[2]
Population$Female[1]<-birthsex[2]+Population$Female[1]
```
# Age distributions
## Census Bureau age distribution
Note that raking marginals for age and gender are also produced here.\
Two sets of age/gender marginals. One is the traditional with separate age and gender, the other collects age and gender in one variable. The latter will force age and gender to match IDB exactly, but at the cost of bigger weight variation than with separate age and gender.
```{r censusage}
Population$AgeGroups<-as.integer(cut(as.numeric(Population$GROUP), breaks = agebreaks))   #Note change to 0 to 4
AgeGroups<-as.data.frame(group_by(Population,AgeGroups) %>% summarise(Males=sum(Male),
                                               Females=sum(Female)))

AgeGroups<-AgeGroups %>% mutate(AgeGr=case_when(is.na(AgeGroups)~"Total",
                                                .default=agestr[AgeGroups]),
                                AgeGroups=case_when(is.na(AgeGroups)~100,
                                                    .default=AgeGroups))
AgeGroups   

AgeGroups$PctFem<-AgeGroups$Females/(AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total'])*100
AgeGroups$PctMal<-AgeGroups$Males/(AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total'])*100

IDBAgeTable <- AgeGroups %>% select(AgeGr,Males,PctMal, Females,PctFem)%>% gt()%>%
  tab_header(
    title = "Gaza Strip population at risk 2023, broad age groups",
    subtitle = "based on the International Data Base, U..S Census Bureau"
      ) %>%   tab_source_note(
    source_note = "Percentages are based on the total population"
    ) %>% tab_source_note(
      source_note = "Births during survey period added"
    )  %>%   tab_source_note(
  source_note = "Source: https://www.census.gov/ International Programs Main/ Data /International Database."
  ) %>% 
  tab_source_note(
    source_note = "Downloaded 25.01.2025" 
  ) %>% 
  cols_label(
   AgeGr="Age", PctMal="Male percent", PctFem="Female percent" 
  ) %>% 
  fmt_number(n_sigfig = 3) %>%
  fmt_number(
    columns=c(PctMal,PctFem),
    decimals=1
  )

  IDBAgeTable
  saveTable(IDBAgeTable)
  #
  # Construct marginals based on the IDB table
  #
  IDBGenderMarginal<-data.frame(HR04=c(1,2), Freq=c(AgeGroups$Males[nrow(AgeGroups)],AgeGroups$Females[nrow(AgeGroups)]))  # IDB Gender 
  IDBGenderMarginal
  IDBTotalAge<-AgeGroups$Males[-nrow(AgeGroups)]+ AgeGroups$Females[-nrow(AgeGroups)]
  IDBTotalAgeMarginal<-data.frame(AgeGroups=seq(1,length(IDBTotalAge)),Freq=IDBTotalAge)
  IDBTotalAgeMarginal
  sum(IDBTotalAgeMarginal$Freq)==sum(IDBGenderMarginal$Freq)   # This should be true
  # Also construct a marginal that is a blend of age and gender - to get that exact
  IDBMales<-data.frame(AgeSex=paste(agestr,"Male"),Freq=AgeGroups$Males[-nrow(AgeGroups)])
  IDBFemales<-data.frame(AgeSex=paste(agestr,"Female"),Freq=AgeGroups$Females[-nrow(AgeGroups)])
  IDBSeparateSexesAndAgeMarginal<-bind_rows(IDBMales,IDBFemales)
  IDBSeparateSexesAndAgeMarginal
  sum(IDBSeparateSexesAndAgeMarginal$Freq)==sum(IDBGenderMarginal$Freq)   # This should be true
```
## Age distribution in the survey data file
```{r surveyage}
# add roster and birth together

fates<-rbind(fates1,births)  
fates$AgeGroups<-as.integer(cut(as.numeric(fates$HR03), breaks = agebreaks))
fates<-fates %>% mutate(AgeGr=case_when(is.na(AgeGroups)~"Total",
                                                     .default=agestr[AgeGroups]),
                                        Gender=case_when(HR04==1~'Male',
                                        HR04==2~'Female'))
fates$AgeSex<-paste(fates$AgeGr,fates$Gender,sep=" ")

RosterAge<-fates %>% group_by(AgeGr,Gender)  %>% summarize(sumpop=n())
RosterAge<-reshape(as.data.frame(RosterAge),idvar="AgeGr", timevar="Gender",direction="wide")
RosterAge
names(RosterAge)<-c('AgeGr','FemaleRoster','MaleRoster')
RosterAge<-rbind(RosterAge,data.frame(AgeGr='Total',FemaleRoster=sum(RosterAge$FemaleRoster),MaleRoster=sum(RosterAge$MaleRoster)))
RosterAge$PctFemRoster<-RosterAge$FemaleRoster/(RosterAge$MaleRoster[RosterAge$AgeGr=='Total']+RosterAge$FemaleRoster[RosterAge$AgeGr=='Total'])*100
RosterAge$PctMalRoster<-RosterAge$MaleRoster/(RosterAge$MaleRoster[RosterAge$AgeGr=='Total']+RosterAge$FemaleRoster[RosterAge$AgeGr=='Total'])*100
RosterAge
SurveyAgeTable <- RosterAge %>% select(AgeGr,MaleRoster,PctMalRoster, FemaleRoster,PctFemRoster) %>% 
  gt()%>%
  tab_header(
    title = "Household members October 6, 2023 and births in survey period by broad age groups" ) %>% 
      tab_source_note(source_note = "Unweighted. Percentages are based on the total")  %>% 
      tab_source_note(source_note = paste("Based on ",maindata))  %>% 
  cols_label(
    AgeGr="Age",
    MaleRoster="Male",PctMalRoster="Percent male", FemaleRoster="Females",PctFemRoster="Percent female"
  ) %>% 
  fmt_number(
    columns=c(PctMalRoster,PctFemRoster),
    decimals=1
  )


SurveyAgeTable
saveTable(SurveyAgeTable)
```
# Construct variables, data frames and vectors needed for survey design definition
## Construct governorate marginals 
Since the adjusted weights also should reflect what we think is reality, not only the sampling,
we use marginals adjusted to the IDB Census Bureau population.<br>

|Governorate  |PCBS 2023|IDB adjusted|	Questionnaire code	|North South code|	PSUs
|:------------|:-------:|:----------:|:--------------------:|:----------:|:----:|
|North Gaza   |	444412  |	418833	   |                     1|	  1        |	36  |
|Gaza City	  |  749100	| 705983	   |                     2|	          2|	72  | 
|Deir al Balah|	319208  |	300835	   |                     4|	         3 |   28|
|Khan Younis	| 438557	| 413315	   |                     3|	         4 |   42|
|Rafah	      |  275267 |	259423	   |                     5|	         5|    22|
<br>
The SPSS data file uses the questionnaire code, but that has been recoded into the north south code (above)  
Note that the scale factor to account for new births is applied.
```{r governorate marginals}
governoratemarginals<-data.frame(HR00=seq(1,5,1), Freq=c( 418833,705983, 300835,413315, 259423)*USCBScaleFactor )
governoratemarginals                                    
```

## Construct household size marginals
Note that we have to scale to current population and since we are using the roster file
we have to use marginals that are population in each size group
There are two versions using household sizes from the 2017 census, and from the 2019-2020 MICS survey
First the census version
```{r household sizemarginals census}
householdsize2017 <-c(10661,31474,34084,43481,49305, 50239,42741,30693,19041,(10547 + 12444))
hhsizes<-c(seq(1,9,1),11.43151668)
popbyhouseholdsize2017<-householdsize2017*hhsizes
scalefactor2017to2023<-sum(IDBGenderMarginal$Freq)/sum(popbyhouseholdsize2017)
householdsizemarginals2017<-popbyhouseholdsize2017*scalefactor2017to2023
householdsizemarginals2017<-data.frame(hsize10=seq(1,10,1),Freq=householdsizemarginals2017)
householdsizemarginals2017
# Here from MICS 2019-2020 household file (Downloaded from https://mics.unicef.org/surveys)
#the initial data are persons in each household size group
hhsizemarginalsMICS<-c(110,
                          501,
                          1069,
                          1841,
                          2458,
                          3218,
                          3289,
                          2255,
                          1816,
                          2502)  # From 1 to 10+
MICSScalefactor<-sum(IDBGenderMarginal$Freq)/sum(hhsizemarginalsMICS)
householdsizemarginalsMICS<-hhsizemarginalsMICS*MICSScalefactor

householdsizemarginalsMICS<-data.frame(hsize10=seq(1,10,1),Freq=householdsizemarginalsMICS)
householdsizemarginalsMICS

# Difference between distributions based on 2017 and MICS
round(householdsizemarginals2017$Freq-householdsizemarginalsMICS$Freq)
sum(householdsizemarginals2017$Freq)-sum(householdsizemarginalsMICS$Freq)    # Should be 0
```
## Get household sizes into the data file
* Two variables: 
    + hsize : the household size 
    + hsize10: the household size, but everything over 10 is set at 10. This is the one used to rake
```{r householdsizes}
fates$onedummy<-1   # simple counting variable that is also used initially for weights
pop23<-AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total']
fates$simpleExpand<-pop23/nrow(fates)
hsiz=aggregate(fates$onedummy,list(fates$prikey),sum)
names(hsiz)<-c("prikey","hsize")
fates<-left_join(fates,hsiz,by="prikey")
fates$hsize10<-ifelse(fates$hsize<10,fates$hsize,10)
```
## Some useful strings
```{r usefulstrings}
s_mf<-c("Male", "Female")
s_status<-c("Resident","Left the Gaza Strip","Elsewhere in the Gaza Strip","Dead","Missing","Imprisoned" )
s_cause_dead_full<-c("Disease etc","Disease etc, no aid","Accident","Violent", "Not known")
s_Cause_dead_short<-c("Total violent deaths","Total nonviolent deaths")
s_governorates<-c("Northern Gaza","Gaza city","Deir al-Balah","Khan Younis","Rafah")
s_maincat<-c("Children (<18)","Women (18-64)","Elderly (65+)","Men (18-64)")
s_WEC<-"Women, elderly, and children"
```
# Define the survey design
The marginals for raking are the following:  

* Age gender related (one must either use HR04 and AgeGroups or only AgeSex):
    + AgeSex matches IDBSeparateSexesAndAgeMarginal   
    + AgeGroups matches IDBTotalAgeMarginal
    + HR04 matches IDBGenderMarginal
    
* Governorate:
    + HR00 matches governoratemarginals
    
* Household size (use one, not both)
    + match hsize10 to householdsizemarginalsMICS, or
    + match hsize10 to householdsizemarginals2017

## Categories for reporting
Needs to be done before design definitions since design incorporates data    
    
```{r cats}
fates<- fates %>% mutate(Categories=case_when(HR07< 18 ~ 1,
                                              HR04==2 & HR07>17 & HR07<65 ~ 2,
                                              HR07>64 ~3,
                                              HR04==1 & HR07>17 & HR07<65 ~ 4),
                         WEC=case_when(is.na(HR07)~0,
                                             HR04==2 | (HR07<18 | HR07>64)~1,
                                             HR04==1 & (HR07>17 & HR07<65)~0),
                         WECmiss=case_when(HR04==2 | (HR03<18 | HR03>64)~1,
                                             HR04==1 & (HR03>17 & HR03<65)~0),
                         Allmiss=if_else(HR05==5,1,0),
                         CatMiss=case_when(HR03< 18 ~ 1,
                                              HR04==2 & HR03>17 & HR03<65 ~ 2,
                                              HR03>64 ~3,
                                              HR04==1 & HR03>17 & HR03<65 ~ 4),
                         typedeath=case_when(HR06==4~1,
                                             HR06<4 ~2),
                         Dead=if_else(HR05==4,1,0),
                         Violent=if_else(HR05==4& HR06==4,1,0),
                         Nonviolent=if_else(HR05==4 & HR06<4,1,0),
                         Missing=if_else(HR05==5,1,0))
fates<- fates %>% mutate(ratedenominator = case_when(HR01>50 & (HR05==4 | HR05==2)~0.25,
                                                     HR01>5~0.5,
                                                     HR05==1~1,
                                                     HR05==2~0.5,
                                                     HR05==3~1,
                                                     HR05==4~0.5,
                                                     HR05==5~1,
                                                     HR05==6~1))

```
# The actual specification of the design
```{r design}
fates$stratum<-fates$HR00*10+fates$PSUType
uwdesign<-svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~onedummy)    # uw prefix means unweighted
tbs<-svytable(~HR05+HR04,uwdesign)
ftable(tbs)

#  The following should match the svytable (simple unweighted table)
xtabs(~HR05+HR04,fates)
# 
# Full raked design
rakedesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginalsMICS,governoratemarginals))
svytable(~HR04+HR05,rakedesign)
svyby(~onedummy,~HR04,design=rakedesign,svytotal)
#
# Check weights
#
weights<-weights(rakedesign)
boxplot(weights)
summary(weights)
# This is a bit high variation
# so let us fix that
trimmedrakedesign<-trimWeights(rakedesign, upper=median(weights*2), lower=90)
weights<-weights(trimmedrakedesign)
boxplot(weights)
summary(weights)

# Export data file
GMS_Used_data<-model.frame(trimmedrakedesign)
GMS_Used_data<-cbind(GMS_Used_data,weights)
saveRDS(GMS_Used_data,file="GMS_at_risk_apr_18_2025.rds")
rm(GMS_Used_data)
```
## It is now possible to define other designs, and then use them as the current design 
```{r mainoutcomestab}
currentdesign<-trimmedrakedesign

tbl<-round(svyby(~onedummy,~HR05+~HR04,design=currentdesign,svytotal,vartype=c("se","ci")))
tbl
tbl$HR05<-toString(tbl$HR05,s_status)
tbl$HR04<-toString(tbl$HR04,s_mf)
names(tbl)<-c("Status","Gender", "Estimate","se","ci_l","ci_u")
tbl
tblT<-round(svyby(~onedummy,~HR05,design=currentdesign,svytotal,vartype=c("se","ci")))

tblT$HR05<-toString(tblT$HR05,s_status)
tblT$Gender<-rep("Total",length(s_status))
tblT<-tblT[,c("HR05","Gender","onedummy","se","ci_l","ci_u")]
tblT
names(tblT)<-c("Status","Gender", "Estimate","se","ci_l","ci_u")
tblT
tblT<-bind_rows(tbl,tblT)
mainoutcomestable<-tblT%>% 
  group_by(Gender) %>%
  gt(row_group_as_column = TRUE) %>%
  tab_header(
    title = "Gaza Strip Survey main status outcomes") %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size") %>%
  tab_source_note(source_note = paste("Based on Gaza Mortality Survey ",maindata))  %>% 
    cols_merge(
       columns=c(ci_l,ci_u),
       pattern="({1}, {2})")%>%
    cols_label(se~"Standard<br>error",
               ci_l~"Confidence<br>Interval",
               .fn = md) %>% 
     fmt_number(n_sigfig = 3)
mainoutcomestable
saveTable(mainoutcomestable)
```
## Main table of deaths
Baseline deaths are IDB estimated deaths 2023 6348, 2024 6188,2025 6043  87 days in 2023\
#https://www.census.gov/data-tools/demo/idb/#/table?dashboard_page=sources&COUNTRY_YR_ANIM=2023&CCODE_SINGLE=XG&subnat_map_admin=ADM1&CCODE=XG&COUNTRY_YEAR=2023&menu=tableViz&TABLE_YEARS=2023,2024,2025&TABLE_USE_RANGE=N&TABLE_USE_YEARS=Y&TABLE_STEP=1&TABLE_ADD_YEARS=2023,2024,2025&quickReports=CUSTOM&CUSTOM_COLS=CDR,DEATHS \
Downloaded 8.2.2025\
Median day of fieldwork is Jan 2, 2025
```{r maintab}
end <- as.Date("2023-12-31")
start <- strptime("2023-10-06", format="%Y-%m-%d", tz="UTC")
DaysIn2023<-as.numeric(difftime(as.POSIXct(end), as.POSIXct(start, tz="UTC"), units="days"))
DaysIn2025=2
baseline<-6348*DaysIn2023/365+6188+6043*DaysIn2025/365    # might be considered slightly misplaced concreteness
# Define a function to make it easier to repeat
#### Makemaintable function ####
makemaintable<-function(currentdesign,designdescription) {

    #First select violent deaths - HR06==4
    tb1<-round(svyby(~onedummy,~Categories,design=subset(currentdesign,HR06==4),svytotal,vartype=c("se","ci")))  #age and gender
    tb1$Categories<-toString(tb1$Categories,s_maincat)
    tbwec<- svyby(~onedummy,~WEC,design=subset(currentdesign,HR06==4 & WEC==1),svytotal,vartype=c("se","ci"))
    names(tbwec)[1]<-"Categories"  
    tbwec$Categories<-s_WEC
    tb2<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))   # violent / nonviolent
    names(tb2)[1]<-"Categories"
    tb2$Categories<-toString(tb2$Categories,s_Cause_dead_short)
    tb2
    
    tbp1<-svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci"))   #Women,children, elderly, percent
    tbp2<-confint(svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci")))
    tb3<-data.frame(Categories=paste(s_WEC," (%)"),onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
    tb3
    tb4<-round(svyby(~onedummy,~CatMiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))  #missing age and gender
    names(tb4)[1]<-"Categories"
    tb4<-add_row(tb4,Categories=3,onedummy=0, se=0,ci_l=0,ci_u=0,.before=3)
    tb4$Categories<-toString(tb4$Categories,s_maincat)
    tb4
    tb5<-round(svyby(~onedummy,~Allmiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))   # missing
    names(tb5)[1]<-"Categories"
    tb5[1]<-"Total missing persons"
    tb5
    tbp1<-svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci"))   #Women,children, elderly, percent
    tbp2<-confint(svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci")))
    tb6<-data.frame(CatMiss=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
    names(tb6)[1]<-"Categories"
    tb6
    
    # make a collected frame for the table
    tb1$type<-"Violent deaths"
    tb2$type<-"Violent deaths"
    tb2$type[2]<-"Nonviolent deaths"
    tb3$type<-"Violent deaths"
    tb4$type<-"Missing persons"
    tb5$type<-"Missing persons"
    tb6$type<-"Missing persons"
    tbwec$type<-"Violent deaths"
    #
    MF<-rbind(tb1,tbwec,tb2[1,],tb3)
    
    eframe<-data.frame(Categories="Excess nonviolent deaths", onedummy=round(tb2[2,2]-baseline),se=round(tb2[2,3]),ci_l=round(tb2[2,4]-baseline),ci_u=round(tb2[2,5]-baseline),type="Nonviolent deaths")
    MF<-rbind(MF,tb2[2,],eframe)
    
    MF<-rbind(MF,tb4, tb5,tb6)
    MF
    row_number(MF)
    return(MF%>% select(type,Categories,onedummy,se,ci_l,ci_u) %>%
      group_by(type) %>%
      gt(row_group_as_column = TRUE) %>%
      fmt_number(n_sigfig = 3)%>%
      tab_header(
        title = md("Estimates of violent, nonviolent mortality and missing persons<br>by broad age and gender groups")) %>%
        tab_source_note(source_note =designdescription ) %>%
        tab_source_note(source_note = paste("Based on ",maindata))  %>% 
      cols_merge(
        columns=c(ci_l,ci_u),
        pattern="({1}, {2})")%>%
      cols_label(se~"Standard<br>error",
                 ci_l~"Confidence<br>Interval",
                 onedummy="Estimate",
                 .fn = md)
      )

}

trimraketable<-makemaintable(trimmedrakedesign,"Raked to age,gender, governorate, and household size")
trimraketable
saveTable(trimraketable)

```
## Table of violent and nonviolent deaths by governorate
```{r governoratetable}
## Make a table of deaths per governorate
tb1<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))  # all deaths by governorate
tb1$type<-"Total"
names(tb1)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb1$Governorate<-toString(tb1$Governorate,s_governorates)
tb1
tb2<-round(ftable(svytotal(~onedummy,design=subset(currentdesign,HR05==4,vartype=c("se","ci"))) ))     #total deaths, all governorates
tb2ci<-ftable(round(confint(svytotal(~onedummy,design=subset(currentdesign,HR05==4) ))))
tb2<-data.frame(Governorate="Total",Estimate=tb2[1,1],se=tb2[1,2],ci_l=tb2ci[1,1],ci_l=tb2ci[1,2],type="Total")
names(tb2)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb2[1,1]<-"Total"
tb2
tb3<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))   # Total violent / nonviolent
tb3$type=c("Violent deaths","Nonviolent deaths")
names(tb3)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb3$Governorate<-"Total"
tb3
tb4<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,typedeath==1),svytotal,vartype=c("se","ci")))   # violent 
tb4$type<-"Violent deaths"
names(tb4)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb4$Governorate<-toString(tb4$Governorate,s_governorates)
tb4
tb5<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,typedeath==2),svytotal,vartype=c("se","ci")))   # Nonviolent 
tb5$type<-"Nonviolent deaths"
names(tb5)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb5$Governorate<-toString(tb5$Governorate,s_governorates)
tb5
# collect t he governorate table
govtab<-rbind(tb1,tb2,tb4,tb3[1,],tb5,tb3[2,])
governoratetable<-govtab %>% select(type,Governorate,Estimate,se,ci_l,ci_u) %>%
  group_by(type) %>%
  gt(row_group_as_column = TRUE) %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = md("Estimates of violent, nonviolent mortality<br>by governorate of residence on October 6, 2023")) %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
  tab_source_note(source_note = paste("Based on ",maindata))  %>% 
  cols_merge(
    columns=c(ci_l,ci_u),
    pattern="({1}, {2})")%>%
  cols_label(se~"Standard<br>error",
             ci_l~"Confidence<br>Interval",
             .fn = md)
governoratetable
saveTable(governoratetable)
```
## Crude death rates
Calculate crude death rate, changed from initial version and added confidence intervals.
Also added estimate of rates for the whole period, and the annualized rate.
The annualized rate is the yearly rate that, if valid, throughout the survey period, would result approximately in the total number of deaths observed.
The approximation occurs because the calculation depends on simplifying assumptions about exposure.
Note that the Crude death rate calculated here is different from the probability of dying
```{r CDRtable}
ViolentCDR<-svyratio(~Violent,~ratedenominator,design=currentdesign)
NonviolentCDR<-svyratio(~Nonviolent,~ratedenominator,design=currentdesign)
TotalCDR<-svyratio(~Dead,~ratedenominator,design=currentdesign)
ViolentCDRCI<-confint(svyratio(~Violent,~ratedenominator,design=currentdesign))
NonviolentCDRCI<-confint(svyratio(~Nonviolent,~ratedenominator,design=currentdesign))
TotalCDRCI<-confint(svyratio(~Dead,~ratedenominator,design=currentdesign))



tabRatesA<-data.frame(Type=c("Violent","Nonviolent","Total"),CDR=c(ViolentCDR$ratio[1,1],NonviolentCDR$ratio[1,1],TotalCDR$ratio[1,1])*1000*12/15,
                    SE=sqrt(c(ViolentCDR$var[1,1],NonviolentCDR$var[1,1],TotalCDR$var[1,1]))*1000*12/5,
                    l_ci=c(ViolentCDRCI[1,1],NonviolentCDRCI[1],TotalCDRCI[1])*1000*12/15,
                    u_ci=c(ViolentCDRCI[1,2],NonviolentCDRCI[2],TotalCDRCI[2])*1000*12/15)

AnnualizedCDRTable<-tabRatesA %>% gt() %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = md("Annualized mortality rates for the Gaza Strip<br>October 6, 2023 to January 5, 2025")) %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
  tab_source_note(source_note = md(paste("Based on household roster and births <br>and IDB US Census Bureau population estimates"))) %>%
  cols_merge(
    columns=c(l_ci,u_ci),
    pattern="({1}, {2})")%>%
  cols_label(SE~"Standard<br>error",
             l_ci~"Confidence<br>Interval",
             .fn = md)
AnnualizedCDRTable
saveTable(AnnualizedCDRTable)
#
tabRates<-data.frame(Type=c("Violent","Nonviolent","Total"),CDR=c(ViolentCDR$ratio[1,1],NonviolentCDR$ratio[1,1],TotalCDR$ratio[1,1])*1000,
                     SE=sqrt(c(ViolentCDR$var[1,1],NonviolentCDR$var[1,1],TotalCDR$var[1,1]))*1000,
                     l_ci=c(ViolentCDRCI[1,1],NonviolentCDRCI[1],TotalCDRCI[1])*1000,
                     u_ci=c(ViolentCDRCI[1,2],NonviolentCDRCI[2],TotalCDRCI[2])*1000)
CDRTable<-tabRates %>% gt() %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = md("Mortality rates for the Gaza Strip<br>October 6, 2023 to January 5, 2025")) %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
  tab_source_note(source_note = md(paste("Based on household roster and births <br>and IDB US Census Bureau population estimates"))) %>%
cols_merge(
  columns=c(l_ci,u_ci),
  pattern="({1}, {2})")%>%
  cols_label(SE~"Standard<br>error",
             l_ci~"Confidence<br>Interval",
             .fn = md)

CDRTable
saveTable(CDRTable)
```
In comparison Jammaludine et al writes: "We estimated an annualised rate of deaths from
traumatic injury of 39·3 per 1000 people (95% CI
35·7–49·4) or 1·1 per 10 000 person-days. In comparison,
the crude death rate from all causes in 2022 was 2·8 per
1 000 people,28 yielding a rate ratio of 14·0 (95% CI
12·8–17·6).  
  
Different version of CDR to get confidence intervals.  
Strategy is to count deaths as 1, and exposure as  
1 if living (not dead, in prison, missing, moved within the Gaza Strip, not known)
0.5 if dead (assuming half exposure time)
0.5 migrated (again assuming half exposure time)
0.5 if child born during the period (again, half exposure time).
0.25 if child is born during period and migrated
Then rate=sum(deaths*w)/sum(exposure*w), SE can be obtained with linearization.
annualized rate is rate*12/5
standard error of annualized rate is SE(rate)*12/15  (because var(e*k)=var(e)*k^2, and SE = sqrt(var)

## Sensitivity
The basic idea is to run the main table with different rakings, and also with the exclusion of team 9. 
```{r sensitivity}
makemaintableDF<-function(currentdesign) {
  
  #First select violent deaths - HR06==4
  tb1<-round(svyby(~onedummy,~Categories,design=subset(currentdesign,HR06==4),svytotal,vartype=c("se","ci")))  #age and gender
  tb1$Categories<-toString(tb1$Categories,s_maincat)
  tb1    
  tb2<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))   # violent / nonviolent
  names(tb2)[1]<-"Categories"
  tb2$Categories<-toString(tb2$Categories,s_Cause_dead_short)
  tb2
  tbp1<-svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci"))   #Women,children, elderly, percent
  tbp2<-confint(svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci")))
  tb3<-data.frame(Categories=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
  tb3
  tb4<-round(svyby(~onedummy,~CatMiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))  #missing age and gender
  names(tb4)[1]<-"Categories"
  tb4<-add_row(tb4,Categories=3,onedummy=0, se=0,ci_l=0,ci_u=0,.before=3)
  tb4$Categories<-toString(tb4$Categories,s_maincat)
  tb4
  tb5<-round(svyby(~onedummy,~Allmiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))   # missing
  names(tb5)[1]<-"Categories"
  tb5[1]<-"Total missing persons"
  tb5
  tbp1<-svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci"))   #Women,children, elderly, percent
  tbp2<-confint(svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci")))
  tb6<-data.frame(CatMiss=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
  names(tb6)[1]<-"Categories"
  tb6
  
  # make a collected frame for the table
  tb1$type<-"Violent deaths"
  tb2$type<-"Violent deaths"
  tb2$type[2]<-"Nonviolent deaths"
  tb3$type<-"Violent deaths"
  tb4$type<-"Missing persons"
  tb5$type<-"Missing persons"
  tb6$type<-"Missing persons"
  
  #Groupframe<-data.frame(Categories="Violent deaths", onedummy="",se="",ci_l="",ci_u="")
  MF<-rbind(tb1,tb2[1,],tb3)
  #Groupframe<-data.frame(Categories="Nonviolent deaths", onedummy="",se="",ci_l="",ci_u="")
  eframe<-data.frame(Categories="Excess nonviolent deaths", onedummy=round(tb2[2,2]-baseline),se=round(tb2[2,3]),ci_l=round(tb2[2,4]-baseline),ci_u=round(tb2[2,5]-baseline),type="Nonviolent deaths")
  MF<-rbind(MF,tb2[2,],eframe)
  #Groupframe<-data.frame(Categories="Missing persons", onedummy="",se="",ci_l="",ci_u="")
  MF<-rbind(MF,tb4, tb5,tb6)
  return(MF)
}
usedT<-cbind(makemaintableDF(trimmedrakedesign)[,1:3],data.frame(Raking=rep("1. Main model, full rake, trimmed weights",14)))
names(usedT)<-c("Categories","UsedE","UsedSE","Raking")
eqwdesign<-svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~simpleExpand)
eqd<-cbind(makemaintableDF(eqwdesign)[,1:3],data.frame(Raking=rep("7. Unadjusted equal probability",14)))
names(eqd)<-c("Categories","EQE","EQSE","Raking")
eqd
# Make no Gaza 9 team design. The idea is to make it as if gaza9 never was part of the sample, i.e., not subsetting and retaining sample structure.
# It is debatable what is best.But the choice only affects standard errors.
nog9fates<-subset(fates,Interviewer != "gaza9")
nog9design<-rake(svydesign(data=nog9fates,id=~PSUID,strata=~stratum, weights=~onedummy),sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginalsMICS,governoratemarginals))
noGaza9<-cbind(makemaintableDF(nog9design)[,1:3],data.frame(Raking=rep("2. Team 9 excluded",14)))   
names(noGaza9)<-c("Categories","noGE","noGSE","Raking")
# raked, no trim
noTrim<-cbind(makemaintableDF(rakedesign)[,1:3],data.frame(Raking=rep("4. Full rake, no weight trim",14)))
names(noTrim)<-c("Categories","noTrimE","noTrimSE", "Raking")
# No household adjustment
NoHHdesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, governoratemarginals))
NoHH<-cbind(makemaintableDF(NoHHdesign)[,1:3],data.frame(Raking=rep("6. No household size adjustment",14)))
names(NoHH)<-c("Categories","NOHHE","NOHH_SE","Raking")
NoHH
#
# Household adjustment using 2017 HHsizes
HHPDesign<-rake(svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~onedummy),sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), 
                population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginals2017,governoratemarginals))
HH2017<-cbind(makemaintableDF(HHPDesign)[,1:3],data.frame(Raking=rep("3. Census 2017 household size adjustment",14)))   
names(HH2017)<-c("Categories","HH2017_E","HH2017_SE","Raking")

# Use PCBS Population
PCBSPop23<-2226544+sum(birthsex)
PCBSAdjust=PCBSPop23/USCBPopAtRisk
# Adjust the marginals
PCBSTotalAgeMarginal<-IDBTotalAgeMarginal %>% mutate(Freq=Freq*PCBSAdjust)

PCBSGenderMarginal<-IDBGenderMarginal%>% mutate(Freq=Freq*PCBSAdjust)
PCBShouseholdsizemarginalsMICS<-householdsizemarginalsMICS%>% mutate(Freq=Freq*PCBSAdjust)
PCBSgovernoratemarginal<-governoratemarginals%>% mutate(Freq=Freq*PCBSAdjust)
# Full raked PCBS design
PCBSrakedesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(PCBSTotalAgeMarginal, PCBSGenderMarginal, PCBShouseholdsizemarginalsMICS,PCBSgovernoratemarginal))
PCBSPop<-cbind(makemaintableDF(PCBSrakedesign)[,1:3],data.frame(Raking=rep("5. PCBS population, full rake",14)))
names(PCBSPop)<-c("Categories","PCBSE","PCBS_SE","Raking")

# bind together
senstableDF<-cbind(usedT[1:3],eqd[,2:3],noGaza9[,2:3],noTrim[,2:3],NoHH[2:3],HH2017[2:3],PCBSPop[,2:3])   # Omit raking
senstableDF$type<-c(rep("Violent deaths",5),"Nonviolent deaths %",rep("Nonviolent deaths",2),rep("Missing persons",5),"Missing persons %")
senstableDF
senstab<-senstableDF %>% select(Categories,UsedE, UsedSE,noGE,noGSE,HH2017_E,HH2017_SE,noTrimE,noTrimSE,PCBSE,PCBS_SE,NOHHE,NOHH_SE,EQE,EQSE,type) %>%
  group_by(type) %>%
  gt(row_group_as_column = TRUE) %>%
  opt_table_font(size=9) %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = "Sensitivy of estimates to model assumptions") %>%
  tab_source_note(source_note = paste("Based on ",maindata))  %>% 
  tab_spanner(
    label = md("<br>Main model"),
    columns = c(UsedE,UsedSE)
  ) %>% 
  tab_spanner(
    label = md("Unadjusted<br>equal probability"),
    columns = c(EQE,EQSE)) %>% 
          tab_spanner(
            label = md("<br>Team 9 excluded"),
            columns = c(noGE,noGSE)) %>%
  tab_spanner(
    label = md("Full rake,<br>no weight trim"),
    columns = c(noTrimE,noTrimSE)) %>%
  tab_spanner(
    label = md("Census 2017<br>household size adjustment"),
    columns = c(HH2017_E,HH2017_SE)) %>%
  tab_spanner(
    label = md("No houshold<br>size adjustment"),
    columns = c(NOHHE,NOHH_SE)) %>%
  tab_spanner(
    label = md("PCBS population<br>Full rake"),
    columns = c(PCBSE,PCBS_SE)) %>%
  cols_label(UsedE="Estimate",
             UsedSE~"SE",
             EQE="Estimate",
             EQSE~"SE",
             noGE="Estimate",
             noGSE~"SE",
             noTrimE="Estimate",
             noTrimSE~"SE",
             NOHHE="Estimate",
             NOHH_SE~"SE",
             PCBSE="Estimate",
             PCBS_SE="SE",
             HH2017_E="Estimate",HH2017_SE="SE",
             .fn = md) %>%
  tab_footnote(
    locations = cells_column_labels(c(UsedSE)),
    footnote = "SE: Standard error")

senstab
saveTable(senstab)

```
Plot errorbars 
```{r plot,  fig.width=10,fig.height=8}
# First, make a dataset that works. Instead of using melt, cast and reshape esoterica, simply rbind the constituent sets.
names(usedT)[2:3]<-names(eqd)[2:3]<-names(noGaza9)[2:3]<-names(noTrim)[2:3]<-names(PCBSPop)[2:3]<-names(NoHH)[2:3]<-names(HH2017)[2:3]<-c("Estimate", "SE")
usedT$Type<-eqd$Type<-noGaza9$Type<-noTrim$Type<-PCBSPop$Type<-NoHH$Type<-HH2017$Type<-c(rep("Violent deaths",6),rep("Nonviolent deaths",2),rep("Missing persons",6))
longsensetab<-rbind(usedT,noGaza9,HH2017,noTrim,PCBSPop,NoHH,eqd)
dodge<-position_dodge(width=-0.9)   # creates warning in ggplot, but orders legend
gplotdata<-longsensetab %>%filter(!grepl("issing",Categories)) %>%
                         filter(!grepl(", and",Categories))
sensitivplot<-ggplot(gplotdata,aes(x=paste(Type,Categories,sep=" "),y=Estimate,group=Raking,color=Raking ))+
  geom_pointrange(aes(ymin=Estimate-1.96*SE,ymax=Estimate+1.96*SE),position=dodge)+
  xlab("")+ylab("Persons (point estimate and lower and upper confidence bounds)")+
  coord_flip()+
  theme_light()+ theme(legend.position="bottom")+guides(color=guide_legend(ncol=3))
  
sensitivplot
pdf("Sensitivityplot.pdf", width=7.5,height=7,paper="letter")
print(sensitivplot)
dev.off()

```
End of code