Change log April 22, 2025:

Introduction

Library loading and some utility functions

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.2
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.2.0
## ✔ purrr     0.3.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(haven)
library(webshot2)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: survival
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
#library(expss)
library(gt)    # must be loaded after expss otherwise tables crash

agebounds<-function(cuts){
  # create typical age labels from a cut string
  out<-character()
  for (i in 1:(length(cuts)-1)) {
    lower<-cuts[i]+1
    if ((is.infinite(lower)) || (lower<0)) lower<-0
    upper<-cuts[i+1]
    if (lower<10) strlower<-paste(" ",lower,sep="") else strlower<-paste(lower)
    if (i<length(cuts)-1) {out<-append(out,paste(strlower,"-",upper,sep=''))}
     else {out<-append(out,paste(lower,"+",sep=''))}
  }
  return(out)
}

toString<-function(x,str) {
  # Stupid function to put in labels in tables (alternative to using factors or strings the whole way)
  # Replaces numbers with corresponding strings
  max<-length(x)
  out<-vector()
  for (i in 1:max) { 
    value<-x[i]
    replacement<-str[value]
    out<-append(out,replacement)   
  }
  return(out)
}

saveTable<-function(table) {
  # Saves a gt table in two formats - word and pdf
  fname<-deparse(match.call()$table)
  gtsave(table,paste(fname,".docx",sep=""))
  gtsave(table,paste(fname,".pdf",sep=""))
}

Output R version

R.version.string
## [1] "R version 4.2.1 (2022-06-23 ucrt)"

Age groups defintions

If one wants to have different age groups, this is the place to change - not in the actual age group/tabulation code The agebreaks are suitable to the cut function, and the agstr contains label strings for the age groups

agebreaks<-c(-1, 4, 11, 17,29, 39, 64, Inf)
agestr<-agebounds(agebreaks)

#Load data files
Load the roster file, the births file and the IDB population file
Note that since knitr is used, it is best to put the data in the default directory
In original IDB download: Changed 100+ to 100, changed variable names (done in excel)
https://www.census.gov/data-tools/demo/idb/#/pop?dashboard_page=country&COUNTRY_YR_ANIM=2025&CCODE=XG&popPages=BYAGE&menu=popViz&show_countries=n&CCODE_SINGLE=XG&subnat_map_admin=ADM1&POP_YEARS=2023,2024

maindata<-"GMS Household roster.sav"   # note this file has Khan Younis and Deir al Balah switched in order as in the questionnaire
birthsdata="GMS Births.sav"
s_govnames<-c("North Gaza"=1,"Gaza City"=2,"Deir al Balah"=3,"Khan Younis"=4,"Rafah"=5) 
fates1 <- 
  haven::read_sav(maindata)
# recode governorates to reduce my confusion about order
fates1<-fates1 %>% mutate(HR00=case_when(HR00==1~1,
                                      HR00==2~2,
                                      HR00==3~4,
                                      HR00==4~3,
                                      HR00==5~5))
#val_lab(fates1$HR00)<-s_govnames

# Births
births<- 
  haven::read_sav(birthsdata)
# recode governorates to reduce my confusion about order
births<-births %>% mutate(HR00=case_when(HR00==1~1,
                                       HR00==2~2,
                                       HR00==3~4,
                                       HR00==4~3,
                                       HR00==5~5))
#val_lab(births$HR00)<-s_govnames
numbirths=nrow(births)
numbirths
## [1] 357
birthsex<-table(births$HR04)

Population <- readxl::read_excel("Population Gaza Single year age groups IDB_2023.xlsx")
# From original IDB download: Changed 100+ to 100, changed variable names
# View(Population)
# Calculate adjustment to population at risk
# Needs to be done on all marginals
USCBPop23<-Population$Total[1]     # Total population
Persons.in.roster.file<-nrow(fates1)
AverageExpansion<-USCBPop23/Persons.in.roster.file
AverageExpansion
## [1] 215.6839
birthsex<-round(birthsex*AverageExpansion)
TotalAtRisk<-USCBPop23+sum(birthsex)     #using estimated births, rather than projected (due to uncertainty in projected due to conflict)
USCBPopAtRisk<-TotalAtRisk
USCBScaleFactor<-USCBPopAtRisk/USCBPop23
USCBScaleFactor
## [1] 1.036694
# The births will be distributed to boys and girls at 0 age.
Population$Total[1]<-TotalAtRisk
Population$Male[1]<-birthsex[1]+Population$Male[1]
Population$Female[2]<-birthsex[2]+Population$Female[2]
Population$Male[2]<-birthsex[1]+Population$Male[2]
Population$Female[1]<-birthsex[2]+Population$Female[1]

Age distributions

Census Bureau age distribution

Note that raking marginals for age and gender are also produced here.
Two sets of age/gender marginals. One is the traditional with separate age and gender, the other collects age and gender in one variable. The latter will force age and gender to match IDB exactly, but at the cost of bigger weight variation than with separate age and gender.

Population$AgeGroups<-as.integer(cut(as.numeric(Population$GROUP), breaks = agebreaks))   #Note change to 0 to 4
AgeGroups<-as.data.frame(group_by(Population,AgeGroups) %>% summarise(Males=sum(Male),
                                               Females=sum(Female)))

AgeGroups<-AgeGroups %>% mutate(AgeGr=case_when(is.na(AgeGroups)~"Total",
                                                .default=agestr[AgeGroups]),
                                AgeGroups=case_when(is.na(AgeGroups)~100,
                                                    .default=AgeGroups))
AgeGroups   
##   AgeGroups   Males Females AgeGr
## 1         1  181716  173794   0-4
## 2         2  200191  188991  5-11
## 3         3  165430  157849 12-17
## 4         4  248133  240116 18-29
## 5         5  130574  133015 30-39
## 6         6  146967  149998 40-64
## 7         7   29944   28670   65+
## 8       100 1102955 1072433 Total
AgeGroups$PctFem<-AgeGroups$Females/(AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total'])*100
AgeGroups$PctMal<-AgeGroups$Males/(AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total'])*100

IDBAgeTable <- AgeGroups %>% select(AgeGr,Males,PctMal, Females,PctFem)%>% gt()%>%
  tab_header(
    title = "Gaza Strip population at risk 2023, broad age groups",
    subtitle = "based on the International Data Base, U..S Census Bureau"
      ) %>%   tab_source_note(
    source_note = "Percentages are based on the total population"
    ) %>% tab_source_note(
      source_note = "Births during survey period added"
    )  %>%   tab_source_note(
  source_note = "Source: https://www.census.gov/ International Programs Main/ Data /International Database."
  ) %>% 
  tab_source_note(
    source_note = "Downloaded 25.01.2025" 
  ) %>% 
  cols_label(
   AgeGr="Age", PctMal="Male percent", PctFem="Female percent" 
  ) %>% 
  fmt_number(n_sigfig = 3) %>%
  fmt_number(
    columns=c(PctMal,PctFem),
    decimals=1
  )

  IDBAgeTable
Gaza Strip population at risk 2023, broad age groups
based on the International Data Base, U..S Census Bureau
Age Males Male percent Females Female percent
0-4 182,000 8.4 174,000 8.0
5-11 200,000 9.2 189,000 8.7
12-17 165,000 7.6 158,000 7.3
18-29 248,000 11.4 240,000 11.0
30-39 131,000 6.0 133,000 6.1
40-64 147,000 6.8 150,000 6.9
65+ 29,900 1.4 28,700 1.3
Total 1,100,000 50.7 1,070,000 49.3
Percentages are based on the total population
Births during survey period added
Source: https://www.census.gov/ International Programs Main/ Data /International Database.
Downloaded 25.01.2025
  saveTable(IDBAgeTable)
  #
  # Construct marginals based on the IDB table
  #
  IDBGenderMarginal<-data.frame(HR04=c(1,2), Freq=c(AgeGroups$Males[nrow(AgeGroups)],AgeGroups$Females[nrow(AgeGroups)]))  # IDB Gender 
  IDBGenderMarginal
##   HR04    Freq
## 1    1 1102955
## 2    2 1072433
  IDBTotalAge<-AgeGroups$Males[-nrow(AgeGroups)]+ AgeGroups$Females[-nrow(AgeGroups)]
  IDBTotalAgeMarginal<-data.frame(AgeGroups=seq(1,length(IDBTotalAge)),Freq=IDBTotalAge)
  IDBTotalAgeMarginal
##   AgeGroups   Freq
## 1         1 355510
## 2         2 389182
## 3         3 323279
## 4         4 488249
## 5         5 263589
## 6         6 296965
## 7         7  58614
  sum(IDBTotalAgeMarginal$Freq)==sum(IDBGenderMarginal$Freq)   # This should be true
## [1] TRUE
  # Also construct a marginal that is a blend of age and gender - to get that exact
  IDBMales<-data.frame(AgeSex=paste(agestr,"Male"),Freq=AgeGroups$Males[-nrow(AgeGroups)])
  IDBFemales<-data.frame(AgeSex=paste(agestr,"Female"),Freq=AgeGroups$Females[-nrow(AgeGroups)])
  IDBSeparateSexesAndAgeMarginal<-bind_rows(IDBMales,IDBFemales)
  IDBSeparateSexesAndAgeMarginal
##          AgeSex   Freq
## 1      0-4 Male 181716
## 2     5-11 Male 200191
## 3    12-17 Male 165430
## 4    18-29 Male 248133
## 5    30-39 Male 130574
## 6    40-64 Male 146967
## 7      65+ Male  29944
## 8    0-4 Female 173794
## 9   5-11 Female 188991
## 10 12-17 Female 157849
## 11 18-29 Female 240116
## 12 30-39 Female 133015
## 13 40-64 Female 149998
## 14   65+ Female  28670
  sum(IDBSeparateSexesAndAgeMarginal$Freq)==sum(IDBGenderMarginal$Freq)   # This should be true
## [1] TRUE

Age distribution in the survey data file

# add roster and birth together

fates<-rbind(fates1,births)  
fates$AgeGroups<-as.integer(cut(as.numeric(fates$HR03), breaks = agebreaks))
fates<-fates %>% mutate(AgeGr=case_when(is.na(AgeGroups)~"Total",
                                                     .default=agestr[AgeGroups]),
                                        Gender=case_when(HR04==1~'Male',
                                        HR04==2~'Female'))
fates$AgeSex<-paste(fates$AgeGr,fates$Gender,sep=" ")

RosterAge<-fates %>% group_by(AgeGr,Gender)  %>% summarize(sumpop=n())
## `summarise()` has grouped output by 'AgeGr'. You can override using the
## `.groups` argument.
RosterAge<-reshape(as.data.frame(RosterAge),idvar="AgeGr", timevar="Gender",direction="wide")
RosterAge
##    AgeGr sumpop.Female sumpop.Male
## 1    0-4           630         702
## 3   5-11           715         801
## 5  12-17           611         585
## 7  18-29          1416        1264
## 9  30-39           691         764
## 11 40-64           802         917
## 13   65+            87         101
names(RosterAge)<-c('AgeGr','FemaleRoster','MaleRoster')
RosterAge<-rbind(RosterAge,data.frame(AgeGr='Total',FemaleRoster=sum(RosterAge$FemaleRoster),MaleRoster=sum(RosterAge$MaleRoster)))
RosterAge$PctFemRoster<-RosterAge$FemaleRoster/(RosterAge$MaleRoster[RosterAge$AgeGr=='Total']+RosterAge$FemaleRoster[RosterAge$AgeGr=='Total'])*100
RosterAge$PctMalRoster<-RosterAge$MaleRoster/(RosterAge$MaleRoster[RosterAge$AgeGr=='Total']+RosterAge$FemaleRoster[RosterAge$AgeGr=='Total'])*100
RosterAge
##    AgeGr FemaleRoster MaleRoster PctFemRoster PctMalRoster
## 1    0-4          630        702    6.2462820     6.960143
## 3   5-11          715        801    7.0890343     7.941701
## 5  12-17          611        585    6.0579020     5.800119
## 7  18-29         1416       1264   14.0392623    12.532223
## 9  30-39          691        764    6.8510807     7.574856
## 11 40-64          802        917    7.9516161     9.091810
## 13   65+           87        101    0.8625818     1.001388
## 12 Total         4952       5134   49.0977593    50.902241
SurveyAgeTable <- RosterAge %>% select(AgeGr,MaleRoster,PctMalRoster, FemaleRoster,PctFemRoster) %>% 
  gt()%>%
  tab_header(
    title = "Household members October 6, 2023 and births in survey period by broad age groups" ) %>% 
      tab_source_note(source_note = "Unweighted. Percentages are based on the total")  %>% 
      tab_source_note(source_note = paste("Based on ",maindata))  %>% 
  cols_label(
    AgeGr="Age",
    MaleRoster="Male",PctMalRoster="Percent male", FemaleRoster="Females",PctFemRoster="Percent female"
  ) %>% 
  fmt_number(
    columns=c(PctMalRoster,PctFemRoster),
    decimals=1
  )


SurveyAgeTable
Household members October 6, 2023 and births in survey period by broad age groups
Age Male Percent male Females Percent female
0-4 702 7.0 630 6.2
5-11 801 7.9 715 7.1
12-17 585 5.8 611 6.1
18-29 1264 12.5 1416 14.0
30-39 764 7.6 691 6.9
40-64 917 9.1 802 8.0
65+ 101 1.0 87 0.9
Total 5134 50.9 4952 49.1
Unweighted. Percentages are based on the total
Based on GMS Household roster.sav
saveTable(SurveyAgeTable)

Construct variables, data frames and vectors needed for survey design definition

Construct governorate marginals

Since the adjusted weights also should reflect what we think is reality, not only the sampling, we use marginals adjusted to the IDB Census Bureau population.

Governorate PCBS 2023 IDB adjusted Questionnaire code North South code PSUs
North Gaza 444412 418833 1 1 36
Gaza City 749100 705983 2 2 72
Deir al Balah 319208 300835 4 3 28
Khan Younis 438557 413315 3 4 42
Rafah 275267 259423 5 5 22


The SPSS data file uses the questionnaire code, but that has been recoded into the north south code (above)
Note that the scale factor to account for new births is applied.

governoratemarginals<-data.frame(HR00=seq(1,5,1), Freq=c( 418833,705983, 300835,413315, 259423)*USCBScaleFactor )
governoratemarginals                                    
##   HR00     Freq
## 1    1 434201.8
## 2    2 731888.6
## 3    3 311873.9
## 4    4 428481.3
## 5    5 268942.4

Construct household size marginals

Note that we have to scale to current population and since we are using the roster file we have to use marginals that are population in each size group There are two versions using household sizes from the 2017 census, and from the 2019-2020 MICS survey First the census version

householdsize2017 <-c(10661,31474,34084,43481,49305, 50239,42741,30693,19041,(10547 + 12444))
hhsizes<-c(seq(1,9,1),11.43151668)
popbyhouseholdsize2017<-householdsize2017*hhsizes
scalefactor2017to2023<-sum(IDBGenderMarginal$Freq)/sum(popbyhouseholdsize2017)
householdsizemarginals2017<-popbyhouseholdsize2017*scalefactor2017to2023
householdsizemarginals2017<-data.frame(hsize10=seq(1,10,1),Freq=householdsizemarginals2017)
householdsizemarginals2017
##    hsize10      Freq
## 1        1  12357.99
## 2        2  72967.87
## 3        3 118528.16
## 4        4 201608.69
## 5        5 285766.10
## 6        6 349415.35
## 7        7 346810.68
## 8        8 284628.95
## 9        9 198647.00
## 10      10 304657.21
# Here from MICS 2019-2020 household file (Downloaded from https://mics.unicef.org/surveys)
#the initial data are persons in each household size group
hhsizemarginalsMICS<-c(110,
                          501,
                          1069,
                          1841,
                          2458,
                          3218,
                          3289,
                          2255,
                          1816,
                          2502)  # From 1 to 10+
MICSScalefactor<-sum(IDBGenderMarginal$Freq)/sum(hhsizemarginalsMICS)
householdsizemarginalsMICS<-hhsizemarginalsMICS*MICSScalefactor

householdsizemarginalsMICS<-data.frame(hsize10=seq(1,10,1),Freq=householdsizemarginalsMICS)
householdsizemarginalsMICS
##    hsize10      Freq
## 1        1  12555.36
## 2        2  57183.98
## 3        3 122015.31
## 4        4 210131.14
## 5        5 280555.31
## 6        6 367301.46
## 7        7 375405.38
## 8        8 257384.96
## 9        9 207277.64
## 10      10 285577.46
# Difference between distributions based on 2017 and MICS
round(householdsizemarginals2017$Freq-householdsizemarginalsMICS$Freq)
##  [1]   -197  15784  -3487  -8522   5211 -17886 -28595  27244  -8631  19080
sum(householdsizemarginals2017$Freq)-sum(householdsizemarginalsMICS$Freq)    # Should be 0
## [1] 0

Get household sizes into the data file

  • Two variables:
    • hsize : the household size
    • hsize10: the household size, but everything over 10 is set at 10. This is the one used to rake
fates$onedummy<-1   # simple counting variable that is also used initially for weights
pop23<-AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total']
fates$simpleExpand<-pop23/nrow(fates)
hsiz=aggregate(fates$onedummy,list(fates$prikey),sum)
names(hsiz)<-c("prikey","hsize")
fates<-left_join(fates,hsiz,by="prikey")
fates$hsize10<-ifelse(fates$hsize<10,fates$hsize,10)

Some useful strings

s_mf<-c("Male", "Female")
s_status<-c("Resident","Left the Gaza Strip","Elsewhere in the Gaza Strip","Dead","Missing","Imprisoned" )
s_cause_dead_full<-c("Disease etc","Disease etc, no aid","Accident","Violent", "Not known")
s_Cause_dead_short<-c("Total violent deaths","Total nonviolent deaths")
s_governorates<-c("Northern Gaza","Gaza city","Deir al-Balah","Khan Younis","Rafah")
s_maincat<-c("Children (<18)","Women (18-64)","Elderly (65+)","Men (18-64)")
s_WEC<-"Women, elderly, and children"

Define the survey design

The marginals for raking are the following:

Categories for reporting

Needs to be done before design definitions since design incorporates data

fates<- fates %>% mutate(Categories=case_when(HR07< 18 ~ 1,
                                              HR04==2 & HR07>17 & HR07<65 ~ 2,
                                              HR07>64 ~3,
                                              HR04==1 & HR07>17 & HR07<65 ~ 4),
                         WEC=case_when(is.na(HR07)~0,
                                             HR04==2 | (HR07<18 | HR07>64)~1,
                                             HR04==1 & (HR07>17 & HR07<65)~0),
                         WECmiss=case_when(HR04==2 | (HR03<18 | HR03>64)~1,
                                             HR04==1 & (HR03>17 & HR03<65)~0),
                         Allmiss=if_else(HR05==5,1,0),
                         CatMiss=case_when(HR03< 18 ~ 1,
                                              HR04==2 & HR03>17 & HR03<65 ~ 2,
                                              HR03>64 ~3,
                                              HR04==1 & HR03>17 & HR03<65 ~ 4),
                         typedeath=case_when(HR06==4~1,
                                             HR06<4 ~2),
                         Dead=if_else(HR05==4,1,0),
                         Violent=if_else(HR05==4& HR06==4,1,0),
                         Nonviolent=if_else(HR05==4 & HR06<4,1,0),
                         Missing=if_else(HR05==5,1,0))
fates<- fates %>% mutate(ratedenominator = case_when(HR01>50 & (HR05==4 | HR05==2)~0.25,
                                                     HR01>5~0.5,
                                                     HR05==1~1,
                                                     HR05==2~0.5,
                                                     HR05==3~1,
                                                     HR05==4~0.5,
                                                     HR05==5~1,
                                                     HR05==6~1))

The actual specification of the design

fates$stratum<-fates$HR00*10+fates$PSUType
uwdesign<-svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~onedummy)    # uw prefix means unweighted
tbs<-svytable(~HR05+HR04,uwdesign)
ftable(tbs)
##      HR04    1    2
## HR05               
## 1         4520 4569
## 2          140  145
## 3           81   56
## 4          285  176
## 5           57    5
## 6           51    1
#  The following should match the svytable (simple unweighted table)
xtabs(~HR05+HR04,fates)
##     HR04
## HR05    1    2
##    1 4520 4569
##    2  140  145
##    3   81   56
##    4  285  176
##    5   57    5
##    6   51    1
# 
# Full raked design
rakedesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginalsMICS,governoratemarginals))
svytable(~HR04+HR05,rakedesign)
##     HR05
## HR04           1           2           3           4           5           6
##    1 983616.6154  27747.0024  14841.8167  56698.8796  10783.9508   9266.5554
##    2 995888.0906  28521.4894  11775.7981  34983.9959   1111.6395    152.1662
svyby(~onedummy,~HR04,design=rakedesign,svytotal)
##   HR04 onedummy        se
## 1    1  1102955 0.0661335
## 2    2  1072433 0.0661335
#
# Check weights
#
weights<-weights(rakedesign)
boxplot(weights)

summary(weights)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   99.61  144.03  197.99  215.68  256.26  721.09
# This is a bit high variation
# so let us fix that
trimmedrakedesign<-trimWeights(rakedesign, upper=median(weights*2), lower=90)
weights<-weights(trimmedrakedesign)
boxplot(weights)

summary(weights)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   104.8   149.2   203.2   215.7   261.4   400.9
# Export data file
GMS_Used_data<-model.frame(trimmedrakedesign)
GMS_Used_data<-cbind(GMS_Used_data,weights)
saveRDS(GMS_Used_data,file="GMS_at_risk_apr_18_2025.rds")
rm(GMS_Used_data)

It is now possible to define other designs, and then use them as the current design

currentdesign<-trimmedrakedesign

tbl<-round(svyby(~onedummy,~HR05+~HR04,design=currentdesign,svytotal,vartype=c("se","ci")))
tbl
##     HR05 HR04 onedummy   se   ci_l    ci_u
## 1.1    1    1   982822 7004 969095  996550
## 2.1    2    1    28285 3118  22175   34396
## 3.1    3    1    15102 2256  10680   19523
## 4.1    4    1    56705 3995  48874   64536
## 5.1    5    1    11071 1983   7186   14957
## 6.1    6    1     9422 1670   6149   12696
## 1.2    1    2   995139 6370 982653 1007624
## 2.2    2    2    28911 4082  20912   36911
## 3.2    3    2    11923 2158   7694   16152
## 4.2    4    2    34712 3751  27361   42063
## 5.2    5    2     1137  559     42    2233
## 6.2    6    2      157  160   -156     470
tbl$HR05<-toString(tbl$HR05,s_status)
tbl$HR04<-toString(tbl$HR04,s_mf)
names(tbl)<-c("Status","Gender", "Estimate","se","ci_l","ci_u")
tbl
##                          Status Gender Estimate   se   ci_l    ci_u
## 1.1                    Resident   Male   982822 7004 969095  996550
## 2.1         Left the Gaza Strip   Male    28285 3118  22175   34396
## 3.1 Elsewhere in the Gaza Strip   Male    15102 2256  10680   19523
## 4.1                        Dead   Male    56705 3995  48874   64536
## 5.1                     Missing   Male    11071 1983   7186   14957
## 6.1                  Imprisoned   Male     9422 1670   6149   12696
## 1.2                    Resident Female   995139 6370 982653 1007624
## 2.2         Left the Gaza Strip Female    28911 4082  20912   36911
## 3.2 Elsewhere in the Gaza Strip Female    11923 2158   7694   16152
## 4.2                        Dead Female    34712 3751  27361   42063
## 5.2                     Missing Female     1137  559     42    2233
## 6.2                  Imprisoned Female      157  160   -156     470
tblT<-round(svyby(~onedummy,~HR05,design=currentdesign,svytotal,vartype=c("se","ci")))

tblT$HR05<-toString(tblT$HR05,s_status)
tblT$Gender<-rep("Total",length(s_status))
tblT<-tblT[,c("HR05","Gender","onedummy","se","ci_l","ci_u")]
tblT
##                          HR05 Gender onedummy    se    ci_l    ci_u
## 1                    Resident  Total  1977961 11502 1955418 2000505
## 2         Left the Gaza Strip  Total    57197  6017   45404   68989
## 3 Elsewhere in the Gaza Strip  Total    27024  3744   19687   34362
## 4                        Dead  Total    91417  6447   78782  104052
## 5                     Missing  Total    12209  2198    7901   16516
## 6                  Imprisoned  Total     9580  1692    6264   12895
names(tblT)<-c("Status","Gender", "Estimate","se","ci_l","ci_u")
tblT
##                        Status Gender Estimate    se    ci_l    ci_u
## 1                    Resident  Total  1977961 11502 1955418 2000505
## 2         Left the Gaza Strip  Total    57197  6017   45404   68989
## 3 Elsewhere in the Gaza Strip  Total    27024  3744   19687   34362
## 4                        Dead  Total    91417  6447   78782  104052
## 5                     Missing  Total    12209  2198    7901   16516
## 6                  Imprisoned  Total     9580  1692    6264   12895
tblT<-bind_rows(tbl,tblT)
mainoutcomestable<-tblT%>% 
  group_by(Gender) %>%
  gt(row_group_as_column = TRUE) %>%
  tab_header(
    title = "Gaza Strip Survey main status outcomes") %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size") %>%
  tab_source_note(source_note = paste("Based on Gaza Mortality Survey ",maindata))  %>% 
    cols_merge(
       columns=c(ci_l,ci_u),
       pattern="({1}, {2})")%>%
    cols_label(se~"Standard<br>error",
               ci_l~"Confidence<br>Interval",
               .fn = md) %>% 
     fmt_number(n_sigfig = 3)
mainoutcomestable
Gaza Strip Survey main status outcomes
Status Estimate Standard
error
Confidence
Interval
Male Resident 983,000 7,000 (969,000, 997,000)
Left the Gaza Strip 28,300 3,120 (22,200, 34,400)
Elsewhere in the Gaza Strip 15,100 2,260 (10,700, 19,500)
Dead 56,700 4,000 (48,900, 64,500)
Missing 11,100 1,980 (7,190, 15,000)
Imprisoned 9,420 1,670 (6,150, 12,700)
Female Resident 995,000 6,370 (983,000, 1,010,000)
Left the Gaza Strip 28,900 4,080 (20,900, 36,900)
Elsewhere in the Gaza Strip 11,900 2,160 (7,690, 16,200)
Dead 34,700 3,750 (27,400, 42,100)
Missing 1,140 559 (42.0, 2,230)
Imprisoned 157 160 (−156, 470)
Total Resident 1,980,000 11,500 (1,960,000, 2,000,000)
Left the Gaza Strip 57,200 6,020 (45,400, 69,000)
Elsewhere in the Gaza Strip 27,000 3,740 (19,700, 34,400)
Dead 91,400 6,450 (78,800, 104,000)
Missing 12,200 2,200 (7,900, 16,500)
Imprisoned 9,580 1,690 (6,260, 12,900)
Raked to age,gender, governorate, and household size
Based on Gaza Mortality Survey GMS Household roster.sav
saveTable(mainoutcomestable)

Main table of deaths

Baseline deaths are IDB estimated deaths 2023 6348, 2024 6188,2025 6043 87 days in 2023
#https://www.census.gov/data-tools/demo/idb/#/table?dashboard_page=sources&COUNTRY_YR_ANIM=2023&CCODE_SINGLE=XG&subnat_map_admin=ADM1&CCODE=XG&COUNTRY_YEAR=2023&menu=tableViz&TABLE_YEARS=2023,2024,2025&TABLE_USE_RANGE=N&TABLE_USE_YEARS=Y&TABLE_STEP=1&TABLE_ADD_YEARS=2023,2024,2025&quickReports=CUSTOM&CUSTOM_COLS=CDR,DEATHS
Downloaded 8.2.2025
Median day of fieldwork is Jan 2, 2025

end <- as.Date("2023-12-31")
start <- strptime("2023-10-06", format="%Y-%m-%d", tz="UTC")
DaysIn2023<-as.numeric(difftime(as.POSIXct(end), as.POSIXct(start, tz="UTC"), units="days"))
DaysIn2025=2
baseline<-6348*DaysIn2023/365+6188+6043*DaysIn2025/365    # might be considered slightly misplaced concreteness
# Define a function to make it easier to repeat
#### Makemaintable function ####
makemaintable<-function(currentdesign,designdescription) {

    #First select violent deaths - HR06==4
    tb1<-round(svyby(~onedummy,~Categories,design=subset(currentdesign,HR06==4),svytotal,vartype=c("se","ci")))  #age and gender
    tb1$Categories<-toString(tb1$Categories,s_maincat)
    tbwec<- svyby(~onedummy,~WEC,design=subset(currentdesign,HR06==4 & WEC==1),svytotal,vartype=c("se","ci"))
    names(tbwec)[1]<-"Categories"  
    tbwec$Categories<-s_WEC
    tb2<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))   # violent / nonviolent
    names(tb2)[1]<-"Categories"
    tb2$Categories<-toString(tb2$Categories,s_Cause_dead_short)
    tb2
    
    tbp1<-svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci"))   #Women,children, elderly, percent
    tbp2<-confint(svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci")))
    tb3<-data.frame(Categories=paste(s_WEC," (%)"),onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
    tb3
    tb4<-round(svyby(~onedummy,~CatMiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))  #missing age and gender
    names(tb4)[1]<-"Categories"
    tb4<-add_row(tb4,Categories=3,onedummy=0, se=0,ci_l=0,ci_u=0,.before=3)
    tb4$Categories<-toString(tb4$Categories,s_maincat)
    tb4
    tb5<-round(svyby(~onedummy,~Allmiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))   # missing
    names(tb5)[1]<-"Categories"
    tb5[1]<-"Total missing persons"
    tb5
    tbp1<-svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci"))   #Women,children, elderly, percent
    tbp2<-confint(svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci")))
    tb6<-data.frame(CatMiss=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
    names(tb6)[1]<-"Categories"
    tb6
    
    # make a collected frame for the table
    tb1$type<-"Violent deaths"
    tb2$type<-"Violent deaths"
    tb2$type[2]<-"Nonviolent deaths"
    tb3$type<-"Violent deaths"
    tb4$type<-"Missing persons"
    tb5$type<-"Missing persons"
    tb6$type<-"Missing persons"
    tbwec$type<-"Violent deaths"
    #
    MF<-rbind(tb1,tbwec,tb2[1,],tb3)
    
    eframe<-data.frame(Categories="Excess nonviolent deaths", onedummy=round(tb2[2,2]-baseline),se=round(tb2[2,3]),ci_l=round(tb2[2,4]-baseline),ci_u=round(tb2[2,5]-baseline),type="Nonviolent deaths")
    MF<-rbind(MF,tb2[2,],eframe)
    
    MF<-rbind(MF,tb4, tb5,tb6)
    MF
    row_number(MF)
    return(MF%>% select(type,Categories,onedummy,se,ci_l,ci_u) %>%
      group_by(type) %>%
      gt(row_group_as_column = TRUE) %>%
      fmt_number(n_sigfig = 3)%>%
      tab_header(
        title = md("Estimates of violent, nonviolent mortality and missing persons<br>by broad age and gender groups")) %>%
        tab_source_note(source_note =designdescription ) %>%
        tab_source_note(source_note = paste("Based on ",maindata))  %>% 
      cols_merge(
        columns=c(ci_l,ci_u),
        pattern="({1}, {2})")%>%
      cols_label(se~"Standard<br>error",
                 ci_l~"Confidence<br>Interval",
                 onedummy="Estimate",
                 .fn = md)
      )

}

trimraketable<-makemaintable(trimmedrakedesign,"Raked to age,gender, governorate, and household size")
trimraketable
Estimates of violent, nonviolent mortality and missing persons
by broad age and gender groups
Categories Estimate Standard
error
Confidence
Interval
Violent deaths Children (<18) 22,800 3,090 (16,700, 28,800)
Women (18-64) 16,600 2,220 (12,200, 20,900)
Elderly (65+) 2,870 943 (1,020, 4,720)
Men (18-64) 32,900 2,700 (27,600, 38,200)
Women, elderly, and children 42,200 4,630 (33,100, 51,300)
Total violent deaths 75,200 5,920 (63,600, 86,800)
Women, elderly, and children (%) 56.2 2.94 (50.4, 61.9)
Nonviolent deaths Total nonviolent deaths 16,300 2,040 (12,300, 20,200)
Excess nonviolent deaths 8,540 2,040 (4,540, 12,500)
Missing persons Children (<18) 3,610 1,040 (1,570, 5,650)
Women (18-64) 131 134 (−133, 394)
Elderly (65+) 0 0 (0, 0)
Men (18-64) 8,470 1,650 (5,240, 11,700)
Total missing persons 12,200 2,200 (7,900, 16,500)
Women, elderly, and children 30.6 6.40 (18.1, 43.2)
Raked to age,gender, governorate, and household size
Based on GMS Household roster.sav
saveTable(trimraketable)

Table of violent and nonviolent deaths by governorate

## Make a table of deaths per governorate
tb1<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))  # all deaths by governorate
tb1$type<-"Total"
names(tb1)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb1$Governorate<-toString(tb1$Governorate,s_governorates)
tb1
##     Governorate Estimate   se  ci_l  ci_u  type
## 1 Northern Gaza    15964 3232  9630 22299 Total
## 2     Gaza city    35661 4673 26502 44820 Total
## 3 Deir al-Balah    12375 1894  8663 16087 Total
## 4   Khan Younis    18197 2468 13360 23035 Total
## 5         Rafah     9219 1816  5660 12779 Total
tb2<-round(ftable(svytotal(~onedummy,design=subset(currentdesign,HR05==4,vartype=c("se","ci"))) ))     #total deaths, all governorates
tb2ci<-ftable(round(confint(svytotal(~onedummy,design=subset(currentdesign,HR05==4) ))))
tb2<-data.frame(Governorate="Total",Estimate=tb2[1,1],se=tb2[1,2],ci_l=tb2ci[1,1],ci_l=tb2ci[1,2],type="Total")
names(tb2)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb2[1,1]<-"Total"
tb2
##   Governorate Estimate   se  ci_l   ci_u  type
## 1       Total    91417 6447 78782 104052 Total
tb3<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))   # Total violent / nonviolent
tb3$type=c("Violent deaths","Nonviolent deaths")
names(tb3)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb3$Governorate<-"Total"
tb3
##   Governorate Estimate   se  ci_l  ci_u              type
## 1       Total    75164 5919 63563 86766    Violent deaths
## 2       Total    16253 2039 12257 20249 Nonviolent deaths
tb4<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,typedeath==1),svytotal,vartype=c("se","ci")))   # violent 
tb4$type<-"Violent deaths"
names(tb4)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb4$Governorate<-toString(tb4$Governorate,s_governorates)
tb4
##     Governorate Estimate   se  ci_l  ci_u           type
## 1 Northern Gaza    13201 2848  7619 18783 Violent deaths
## 2     Gaza city    31291 4547 22379 40203 Violent deaths
## 3 Deir al-Balah     8276 1424  5486 11066 Violent deaths
## 4   Khan Younis    15065 2155 10842 19288 Violent deaths
## 5         Rafah     7330 1629  4137 10524 Violent deaths
tb5<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,typedeath==2),svytotal,vartype=c("se","ci")))   # Nonviolent 
tb5$type<-"Nonviolent deaths"
names(tb5)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb5$Governorate<-toString(tb5$Governorate,s_governorates)
tb5
##     Governorate Estimate   se ci_l ci_u              type
## 1 Northern Gaza     2763  821 1154 4372 Nonviolent deaths
## 2     Gaza city     4370  975 2460 6280 Nonviolent deaths
## 3 Deir al-Balah     4099 1173 1800 6397 Nonviolent deaths
## 4   Khan Younis     3132  890 1387 4878 Nonviolent deaths
## 5         Rafah     1889  704  509 3269 Nonviolent deaths
# collect t he governorate table
govtab<-rbind(tb1,tb2,tb4,tb3[1,],tb5,tb3[2,])
governoratetable<-govtab %>% select(type,Governorate,Estimate,se,ci_l,ci_u) %>%
  group_by(type) %>%
  gt(row_group_as_column = TRUE) %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = md("Estimates of violent, nonviolent mortality<br>by governorate of residence on October 6, 2023")) %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
  tab_source_note(source_note = paste("Based on ",maindata))  %>% 
  cols_merge(
    columns=c(ci_l,ci_u),
    pattern="({1}, {2})")%>%
  cols_label(se~"Standard<br>error",
             ci_l~"Confidence<br>Interval",
             .fn = md)
governoratetable
Estimates of violent, nonviolent mortality
by governorate of residence on October 6, 2023
Governorate Estimate Standard
error
Confidence
Interval
Total Northern Gaza 16,000 3,230 (9,630, 22,300)
Gaza city 35,700 4,670 (26,500, 44,800)
Deir al-Balah 12,400 1,890 (8,660, 16,100)
Khan Younis 18,200 2,470 (13,400, 23,000)
Rafah 9,220 1,820 (5,660, 12,800)
Total 91,400 6,450 (78,800, 104,000)
Violent deaths Northern Gaza 13,200 2,850 (7,620, 18,800)
Gaza city 31,300 4,550 (22,400, 40,200)
Deir al-Balah 8,280 1,420 (5,490, 11,100)
Khan Younis 15,100 2,160 (10,800, 19,300)
Rafah 7,330 1,630 (4,140, 10,500)
Total 75,200 5,920 (63,600, 86,800)
Nonviolent deaths Northern Gaza 2,760 821 (1,150, 4,370)
Gaza city 4,370 975 (2,460, 6,280)
Deir al-Balah 4,100 1,170 (1,800, 6,400)
Khan Younis 3,130 890 (1,390, 4,880)
Rafah 1,890 704 (509, 3,270)
Total 16,300 2,040 (12,300, 20,200)
Raked to age,gender, governorate, and household size
Based on GMS Household roster.sav
saveTable(governoratetable)

Crude death rates

Calculate crude death rate, changed from initial version and added confidence intervals. Also added estimate of rates for the whole period, and the annualized rate. The annualized rate is the yearly rate that, if valid, throughout the survey period, would result approximately in the total number of deaths observed. The approximation occurs because the calculation depends on simplifying assumptions about exposure. Note that the Crude death rate calculated here is different from the probability of dying

ViolentCDR<-svyratio(~Violent,~ratedenominator,design=currentdesign)
NonviolentCDR<-svyratio(~Nonviolent,~ratedenominator,design=currentdesign)
TotalCDR<-svyratio(~Dead,~ratedenominator,design=currentdesign)
ViolentCDRCI<-confint(svyratio(~Violent,~ratedenominator,design=currentdesign))
NonviolentCDRCI<-confint(svyratio(~Nonviolent,~ratedenominator,design=currentdesign))
TotalCDRCI<-confint(svyratio(~Dead,~ratedenominator,design=currentdesign))



tabRatesA<-data.frame(Type=c("Violent","Nonviolent","Total"),CDR=c(ViolentCDR$ratio[1,1],NonviolentCDR$ratio[1,1],TotalCDR$ratio[1,1])*1000*12/15,
                    SE=sqrt(c(ViolentCDR$var[1,1],NonviolentCDR$var[1,1],TotalCDR$var[1,1]))*1000*12/5,
                    l_ci=c(ViolentCDRCI[1,1],NonviolentCDRCI[1],TotalCDRCI[1])*1000*12/15,
                    u_ci=c(ViolentCDRCI[1,2],NonviolentCDRCI[2],TotalCDRCI[2])*1000*12/15)

AnnualizedCDRTable<-tabRatesA %>% gt() %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = md("Annualized mortality rates for the Gaza Strip<br>October 6, 2023 to January 5, 2025")) %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
  tab_source_note(source_note = md(paste("Based on household roster and births <br>and IDB US Census Bureau population estimates"))) %>%
  cols_merge(
    columns=c(l_ci,u_ci),
    pattern="({1}, {2})")%>%
  cols_label(SE~"Standard<br>error",
             l_ci~"Confidence<br>Interval",
             .fn = md)
AnnualizedCDRTable
Annualized mortality rates for the Gaza Strip
October 6, 2023 to January 5, 2025
Type CDR Standard
error
Confidence
Interval
Violent 33.1 8.01 (27.9, 38.4)
Nonviolent 7.16 2.70 (5.40, 8.93)
Total 40.3 8.74 (34.6, 46.0)
Raked to age,gender, governorate, and household size
Based on household roster and births
and IDB US Census Bureau population estimates
saveTable(AnnualizedCDRTable)
#
tabRates<-data.frame(Type=c("Violent","Nonviolent","Total"),CDR=c(ViolentCDR$ratio[1,1],NonviolentCDR$ratio[1,1],TotalCDR$ratio[1,1])*1000,
                     SE=sqrt(c(ViolentCDR$var[1,1],NonviolentCDR$var[1,1],TotalCDR$var[1,1]))*1000,
                     l_ci=c(ViolentCDRCI[1,1],NonviolentCDRCI[1],TotalCDRCI[1])*1000,
                     u_ci=c(ViolentCDRCI[1,2],NonviolentCDRCI[2],TotalCDRCI[2])*1000)
CDRTable<-tabRates %>% gt() %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = md("Mortality rates for the Gaza Strip<br>October 6, 2023 to January 5, 2025")) %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
  tab_source_note(source_note = md(paste("Based on household roster and births <br>and IDB US Census Bureau population estimates"))) %>%
cols_merge(
  columns=c(l_ci,u_ci),
  pattern="({1}, {2})")%>%
  cols_label(SE~"Standard<br>error",
             l_ci~"Confidence<br>Interval",
             .fn = md)

CDRTable
Mortality rates for the Gaza Strip
October 6, 2023 to January 5, 2025
Type CDR Standard
error
Confidence
Interval
Violent 41.4 3.34 (34.9, 48.0)
Nonviolent 8.95 1.13 (6.75, 11.2)
Total 50.4 3.64 (43.2, 57.5)
Raked to age,gender, governorate, and household size
Based on household roster and births
and IDB US Census Bureau population estimates
saveTable(CDRTable)

In comparison Jammaludine et al writes: “We estimated an annualised rate of deaths from traumatic injury of 39·3 per 1000 people (95% CI 35·7–49·4) or 1·1 per 10 000 person-days. In comparison, the crude death rate from all causes in 2022 was 2·8 per 1 000 people,28 yielding a rate ratio of 14·0 (95% CI 12·8–17·6).

Different version of CDR to get confidence intervals.
Strategy is to count deaths as 1, and exposure as
1 if living (not dead, in prison, missing, moved within the Gaza Strip, not known) 0.5 if dead (assuming half exposure time) 0.5 migrated (again assuming half exposure time) 0.5 if child born during the period (again, half exposure time). 0.25 if child is born during period and migrated Then rate=sum(deathsw)/sum(exposurew), SE can be obtained with linearization. annualized rate is rate12/5 standard error of annualized rate is SE(rate)12/15 (because var(ek)=var(e)k^2, and SE = sqrt(var)

Sensitivity

The basic idea is to run the main table with different rakings, and also with the exclusion of team 9.

makemaintableDF<-function(currentdesign) {
  
  #First select violent deaths - HR06==4
  tb1<-round(svyby(~onedummy,~Categories,design=subset(currentdesign,HR06==4),svytotal,vartype=c("se","ci")))  #age and gender
  tb1$Categories<-toString(tb1$Categories,s_maincat)
  tb1    
  tb2<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))   # violent / nonviolent
  names(tb2)[1]<-"Categories"
  tb2$Categories<-toString(tb2$Categories,s_Cause_dead_short)
  tb2
  tbp1<-svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci"))   #Women,children, elderly, percent
  tbp2<-confint(svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci")))
  tb3<-data.frame(Categories=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
  tb3
  tb4<-round(svyby(~onedummy,~CatMiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))  #missing age and gender
  names(tb4)[1]<-"Categories"
  tb4<-add_row(tb4,Categories=3,onedummy=0, se=0,ci_l=0,ci_u=0,.before=3)
  tb4$Categories<-toString(tb4$Categories,s_maincat)
  tb4
  tb5<-round(svyby(~onedummy,~Allmiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))   # missing
  names(tb5)[1]<-"Categories"
  tb5[1]<-"Total missing persons"
  tb5
  tbp1<-svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci"))   #Women,children, elderly, percent
  tbp2<-confint(svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci")))
  tb6<-data.frame(CatMiss=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
  names(tb6)[1]<-"Categories"
  tb6
  
  # make a collected frame for the table
  tb1$type<-"Violent deaths"
  tb2$type<-"Violent deaths"
  tb2$type[2]<-"Nonviolent deaths"
  tb3$type<-"Violent deaths"
  tb4$type<-"Missing persons"
  tb5$type<-"Missing persons"
  tb6$type<-"Missing persons"
  
  #Groupframe<-data.frame(Categories="Violent deaths", onedummy="",se="",ci_l="",ci_u="")
  MF<-rbind(tb1,tb2[1,],tb3)
  #Groupframe<-data.frame(Categories="Nonviolent deaths", onedummy="",se="",ci_l="",ci_u="")
  eframe<-data.frame(Categories="Excess nonviolent deaths", onedummy=round(tb2[2,2]-baseline),se=round(tb2[2,3]),ci_l=round(tb2[2,4]-baseline),ci_u=round(tb2[2,5]-baseline),type="Nonviolent deaths")
  MF<-rbind(MF,tb2[2,],eframe)
  #Groupframe<-data.frame(Categories="Missing persons", onedummy="",se="",ci_l="",ci_u="")
  MF<-rbind(MF,tb4, tb5,tb6)
  return(MF)
}
usedT<-cbind(makemaintableDF(trimmedrakedesign)[,1:3],data.frame(Raking=rep("1. Main model, full rake, trimmed weights",14)))
names(usedT)<-c("Categories","UsedE","UsedSE","Raking")
eqwdesign<-svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~simpleExpand)
eqd<-cbind(makemaintableDF(eqwdesign)[,1:3],data.frame(Raking=rep("7. Unadjusted equal probability",14)))
names(eqd)<-c("Categories","EQE","EQSE","Raking")
eqd
##                           Categories         EQE        EQSE
## 1                     Children (<18) 21137.00000 2850.000000
## 2                      Women (18-64) 21353.00000 2812.000000
## 3                      Elderly (65+)  1941.00000  637.000000
## 4                        Men (18-64) 40764.00000 3328.000000
## 11              Total violent deaths 85195.00000 6710.000000
## WEC     Women, elderly, and children    52.15190    2.886403
## 21           Total nonviolent deaths 14235.00000 1845.000000
## 12          Excess nonviolent deaths  6518.00000 1845.000000
## 13                    Children (<18)  3020.00000  828.000000
## 22                     Women (18-64)   216.00000  216.000000
## ...3                   Elderly (65+)     0.00000    0.000000
## 41                       Men (18-64) 10137.00000 1981.000000
## 14             Total missing persons 13372.00000 2431.000000
## WECmiss Women, elderly, and children    24.19355    5.166943
##                                  Raking
## 1       7. Unadjusted equal probability
## 2       7. Unadjusted equal probability
## 3       7. Unadjusted equal probability
## 4       7. Unadjusted equal probability
## 11      7. Unadjusted equal probability
## WEC     7. Unadjusted equal probability
## 21      7. Unadjusted equal probability
## 12      7. Unadjusted equal probability
## 13      7. Unadjusted equal probability
## 22      7. Unadjusted equal probability
## ...3    7. Unadjusted equal probability
## 41      7. Unadjusted equal probability
## 14      7. Unadjusted equal probability
## WECmiss 7. Unadjusted equal probability
# Make no Gaza 9 team design. The idea is to make it as if gaza9 never was part of the sample, i.e., not subsetting and retaining sample structure.
# It is debatable what is best.But the choice only affects standard errors.
nog9fates<-subset(fates,Interviewer != "gaza9")
nog9design<-rake(svydesign(data=nog9fates,id=~PSUID,strata=~stratum, weights=~onedummy),sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginalsMICS,governoratemarginals))
noGaza9<-cbind(makemaintableDF(nog9design)[,1:3],data.frame(Raking=rep("2. Team 9 excluded",14)))   
names(noGaza9)<-c("Categories","noGE","noGSE","Raking")
# raked, no trim
noTrim<-cbind(makemaintableDF(rakedesign)[,1:3],data.frame(Raking=rep("4. Full rake, no weight trim",14)))
names(noTrim)<-c("Categories","noTrimE","noTrimSE", "Raking")
# No household adjustment
NoHHdesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, governoratemarginals))
NoHH<-cbind(makemaintableDF(NoHHdesign)[,1:3],data.frame(Raking=rep("6. No household size adjustment",14)))
names(NoHH)<-c("Categories","NOHHE","NOHH_SE","Raking")
NoHH
##                           Categories       NOHHE     NOHH_SE
## 1                     Children (<18) 25807.00000 3595.000000
## 2                      Women (18-64) 17664.00000 2297.000000
## 3                      Elderly (65+)  2802.00000  909.000000
## 4                        Men (18-64) 33650.00000 2742.000000
## 11              Total violent deaths 79924.00000 6640.000000
## WEC     Women, elderly, and children    57.89686    2.875111
## 21           Total nonviolent deaths 16233.00000 2021.000000
## 12          Excess nonviolent deaths  8516.00000 2021.000000
## 13                    Children (<18)  3959.00000 1123.000000
## 22                     Women (18-64)   194.00000  196.000000
## ...3                   Elderly (65+)     0.00000    0.000000
## 41                       Men (18-64)  8716.00000 1793.000000
## 14             Total missing persons 12869.00000 2521.000000
## WECmiss Women, elderly, and children    32.27426    6.065367
##                                  Raking
## 1       6. No household size adjustment
## 2       6. No household size adjustment
## 3       6. No household size adjustment
## 4       6. No household size adjustment
## 11      6. No household size adjustment
## WEC     6. No household size adjustment
## 21      6. No household size adjustment
## 12      6. No household size adjustment
## 13      6. No household size adjustment
## 22      6. No household size adjustment
## ...3    6. No household size adjustment
## 41      6. No household size adjustment
## 14      6. No household size adjustment
## WECmiss 6. No household size adjustment
#
# Household adjustment using 2017 HHsizes
HHPDesign<-rake(svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~onedummy),sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), 
                population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginals2017,governoratemarginals))
HH2017<-cbind(makemaintableDF(HHPDesign)[,1:3],data.frame(Raking=rep("3. Census 2017 household size adjustment",14)))   
names(HH2017)<-c("Categories","HH2017_E","HH2017_SE","Raking")

# Use PCBS Population
PCBSPop23<-2226544+sum(birthsex)
PCBSAdjust=PCBSPop23/USCBPopAtRisk
# Adjust the marginals
PCBSTotalAgeMarginal<-IDBTotalAgeMarginal %>% mutate(Freq=Freq*PCBSAdjust)

PCBSGenderMarginal<-IDBGenderMarginal%>% mutate(Freq=Freq*PCBSAdjust)
PCBShouseholdsizemarginalsMICS<-householdsizemarginalsMICS%>% mutate(Freq=Freq*PCBSAdjust)
PCBSgovernoratemarginal<-governoratemarginals%>% mutate(Freq=Freq*PCBSAdjust)
# Full raked PCBS design
PCBSrakedesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(PCBSTotalAgeMarginal, PCBSGenderMarginal, PCBShouseholdsizemarginalsMICS,PCBSgovernoratemarginal))
PCBSPop<-cbind(makemaintableDF(PCBSrakedesign)[,1:3],data.frame(Raking=rep("5. PCBS population, full rake",14)))
names(PCBSPop)<-c("Categories","PCBSE","PCBS_SE","Raking")

# bind together
senstableDF<-cbind(usedT[1:3],eqd[,2:3],noGaza9[,2:3],noTrim[,2:3],NoHH[2:3],HH2017[2:3],PCBSPop[,2:3])   # Omit raking
senstableDF$type<-c(rep("Violent deaths",5),"Nonviolent deaths %",rep("Nonviolent deaths",2),rep("Missing persons",5),"Missing persons %")
senstableDF
##                           Categories       UsedE      UsedSE         EQE
## 1                     Children (<18) 22771.00000 3087.000000 21137.00000
## 2                      Women (18-64) 16576.00000 2222.000000 21353.00000
## 3                      Elderly (65+)  2868.00000  943.000000  1941.00000
## 4                        Men (18-64) 32949.00000 2703.000000 40764.00000
## 11              Total violent deaths 75164.00000 5919.000000 85195.00000
## WEC     Women, elderly, and children    56.16376    2.940178    52.15190
## 21           Total nonviolent deaths 16253.00000 2039.000000 14235.00000
## 12          Excess nonviolent deaths  8536.00000 2039.000000  6518.00000
## 13                    Children (<18)  3610.00000 1040.000000  3020.00000
## 22                     Women (18-64)   131.00000  134.000000   216.00000
## ...3                   Elderly (65+)     0.00000    0.000000     0.00000
## 41                       Men (18-64)  8468.00000 1649.000000 10137.00000
## 14             Total missing persons 12209.00000 2198.000000 13372.00000
## WECmiss Women, elderly, and children    30.63832    6.395562    24.19355
##                EQSE        noGE       noGSE     noTrimE    noTrimSE       NOHHE
## 1       2850.000000 19165.00000 2809.000000 22795.00000 3080.000000 25807.00000
## 2       2812.000000 13170.00000 2093.000000 16139.00000 2171.000000 17664.00000
## 3        637.000000  3061.00000 1106.000000  3349.00000 1167.000000  2802.00000
## 4       3328.000000 28736.00000 2637.000000 32085.00000 2641.000000 33650.00000
## 11      6710.000000 64132.00000 5410.000000 74367.00000 5855.000000 79924.00000
## WEC        2.886403    55.19215    3.385384    56.85650    2.940619    57.89686
## 21      1845.000000 17507.00000 2226.000000 17315.00000 2227.000000 16233.00000
## 12      1845.000000  9790.00000 2226.000000  9598.00000 2227.000000  8516.00000
## 13       828.000000  3297.00000 1008.000000  3538.00000 1020.000000  3959.00000
## 22       216.000000   136.00000  139.000000   126.00000  129.000000   194.00000
## ...3       0.000000     0.00000    0.000000     0.00000    0.000000     0.00000
## 41      1981.000000  8362.00000 1712.000000  8232.00000 1604.000000  8716.00000
## 14      2431.000000 11796.00000 2272.000000 11896.00000 2140.000000 12869.00000
## WECmiss    5.166943    29.10800    6.358807    30.79429    6.429389    32.27426
##             NOHH_SE    HH2017_E   HH2017_SE       PCBSE     PCBS_SE
## 1       3595.000000 22367.00000 3045.000000 24138.00000 3261.000000
## 2       2297.000000 15992.00000 2141.000000 17090.00000 2298.000000
## 3        909.000000  3440.00000 1200.000000  3546.00000 1236.000000
## 4       2742.000000 32040.00000 2641.000000 33975.00000 2797.000000
## 11      6640.000000 73838.00000 5776.000000 78749.00000 6200.000000
## WEC        2.875111    56.60792    2.940507    56.85650    2.940619
## 21      2021.000000 17225.00000 2244.000000 18335.00000 2358.000000
## 12      2021.000000  9508.00000 2244.000000 10618.00000 2358.000000
## 13      1123.000000  3510.00000 1019.000000  3746.00000 1080.000000
## 22       196.000000   125.00000  129.000000   133.00000  137.000000
## ...3       0.000000     0.00000    0.000000     0.00000    0.000000
## 41      1793.000000  8103.00000 1563.000000  8717.00000 1698.000000
## 14      2521.000000 11737.00000 2087.000000 12596.00000 2266.000000
## WECmiss    6.065367    30.96632    6.552877    30.79429    6.429389
##                        type
## 1            Violent deaths
## 2            Violent deaths
## 3            Violent deaths
## 4            Violent deaths
## 11           Violent deaths
## WEC     Nonviolent deaths %
## 21        Nonviolent deaths
## 12        Nonviolent deaths
## 13          Missing persons
## 22          Missing persons
## ...3        Missing persons
## 41          Missing persons
## 14          Missing persons
## WECmiss   Missing persons %
senstab<-senstableDF %>% select(Categories,UsedE, UsedSE,noGE,noGSE,HH2017_E,HH2017_SE,noTrimE,noTrimSE,PCBSE,PCBS_SE,NOHHE,NOHH_SE,EQE,EQSE,type) %>%
  group_by(type) %>%
  gt(row_group_as_column = TRUE) %>%
  opt_table_font(size=9) %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = "Sensitivy of estimates to model assumptions") %>%
  tab_source_note(source_note = paste("Based on ",maindata))  %>% 
  tab_spanner(
    label = md("<br>Main model"),
    columns = c(UsedE,UsedSE)
  ) %>% 
  tab_spanner(
    label = md("Unadjusted<br>equal probability"),
    columns = c(EQE,EQSE)) %>% 
          tab_spanner(
            label = md("<br>Team 9 excluded"),
            columns = c(noGE,noGSE)) %>%
  tab_spanner(
    label = md("Full rake,<br>no weight trim"),
    columns = c(noTrimE,noTrimSE)) %>%
  tab_spanner(
    label = md("Census 2017<br>household size adjustment"),
    columns = c(HH2017_E,HH2017_SE)) %>%
  tab_spanner(
    label = md("No houshold<br>size adjustment"),
    columns = c(NOHHE,NOHH_SE)) %>%
  tab_spanner(
    label = md("PCBS population<br>Full rake"),
    columns = c(PCBSE,PCBS_SE)) %>%
  cols_label(UsedE="Estimate",
             UsedSE~"SE",
             EQE="Estimate",
             EQSE~"SE",
             noGE="Estimate",
             noGSE~"SE",
             noTrimE="Estimate",
             noTrimSE~"SE",
             NOHHE="Estimate",
             NOHH_SE~"SE",
             PCBSE="Estimate",
             PCBS_SE="SE",
             HH2017_E="Estimate",HH2017_SE="SE",
             .fn = md) %>%
  tab_footnote(
    locations = cells_column_labels(c(UsedSE)),
    footnote = "SE: Standard error")

senstab
Sensitivy of estimates to model assumptions
Categories

Main model

Team 9 excluded
Census 2017
household size adjustment
Full rake,
no weight trim
PCBS population
Full rake
No houshold
size adjustment
Unadjusted
equal probability
Estimate SE1 Estimate SE Estimate SE Estimate SE Estimate SE Estimate SE Estimate SE
Violent deaths Children (<18) 22,800 3,090 19,200 2,810 22,400 3,040 22,800 3,080 24,100 3,260 25,800 3,600 21,100 2,850
Women (18-64) 16,600 2,220 13,200 2,090 16,000 2,140 16,100 2,170 17,100 2,300 17,700 2,300 21,400 2,810
Elderly (65+) 2,870 943 3,060 1,110 3,440 1,200 3,350 1,170 3,550 1,240 2,800 909 1,940 637
Men (18-64) 32,900 2,700 28,700 2,640 32,000 2,640 32,100 2,640 34,000 2,800 33,600 2,740 40,800 3,330
Total violent deaths 75,200 5,920 64,100 5,410 73,800 5,780 74,400 5,860 78,700 6,200 79,900 6,640 85,200 6,710
Nonviolent deaths % Women, elderly, and children 56.2 2.94 55.2 3.39 56.6 2.94 56.9 2.94 56.9 2.94 57.9 2.88 52.2 2.89
Nonviolent deaths Total nonviolent deaths 16,300 2,040 17,500 2,230 17,200 2,240 17,300 2,230 18,300 2,360 16,200 2,020 14,200 1,840
Excess nonviolent deaths 8,540 2,040 9,790 2,230 9,510 2,240 9,600 2,230 10,600 2,360 8,520 2,020 6,520 1,840
Missing persons Children (<18) 3,610 1,040 3,300 1,010 3,510 1,020 3,540 1,020 3,750 1,080 3,960 1,120 3,020 828
Women (18-64) 131 134 136 139 125 129 126 129 133 137 194 196 216 216
Elderly (65+) 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Men (18-64) 8,470 1,650 8,360 1,710 8,100 1,560 8,230 1,600 8,720 1,700 8,720 1,790 10,100 1,980
Total missing persons 12,200 2,200 11,800 2,270 11,700 2,090 11,900 2,140 12,600 2,270 12,900 2,520 13,400 2,430
Missing persons % Women, elderly, and children 30.6 6.40 29.1 6.36 31.0 6.55 30.8 6.43 30.8 6.43 32.3 6.07 24.2 5.17
Based on GMS Household roster.sav
1 SE: Standard error
saveTable(senstab)

Plot errorbars

# First, make a dataset that works. Instead of using melt, cast and reshape esoterica, simply rbind the constituent sets.
names(usedT)[2:3]<-names(eqd)[2:3]<-names(noGaza9)[2:3]<-names(noTrim)[2:3]<-names(PCBSPop)[2:3]<-names(NoHH)[2:3]<-names(HH2017)[2:3]<-c("Estimate", "SE")
usedT$Type<-eqd$Type<-noGaza9$Type<-noTrim$Type<-PCBSPop$Type<-NoHH$Type<-HH2017$Type<-c(rep("Violent deaths",6),rep("Nonviolent deaths",2),rep("Missing persons",6))
longsensetab<-rbind(usedT,noGaza9,HH2017,noTrim,PCBSPop,NoHH,eqd)
dodge<-position_dodge(width=-0.9)   # creates warning in ggplot, but orders legend
gplotdata<-longsensetab %>%filter(!grepl("issing",Categories)) %>%
                         filter(!grepl(", and",Categories))
sensitivplot<-ggplot(gplotdata,aes(x=paste(Type,Categories,sep=" "),y=Estimate,group=Raking,color=Raking ))+
  geom_pointrange(aes(ymin=Estimate-1.96*SE,ymax=Estimate+1.96*SE),position=dodge)+
  xlab("")+ylab("Persons (point estimate and lower and upper confidence bounds)")+
  coord_flip()+
  theme_light()+ theme(legend.position="bottom")+guides(color=guide_legend(ncol=3))
  
sensitivplot
## Warning: `position_dodge()` requires non-overlapping x intervals.

pdf("Sensitivityplot.pdf", width=7.5,height=7,paper="letter")
print(sensitivplot)
## Warning: `position_dodge()` requires non-overlapping x intervals.
dev.off()
## png 
##   2

End of code