Change log April 22, 2025:

Changed to new household roster input file, and adapted governorate order to fit new file
Added births during the survey period as part of the population at risk
Changed code for death rates, with confidence intervals (now as a ratio estimate directly from data file insted of using aggregates)
Added sensitivity check for PCBS population size, variants of household size assumptions
Miscellaneous changes because of new variable names, wrong headings, etc
Added total number of women, elderly and children to main table

Introduction

Library loading and some utility functions

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.2
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.2.0
## ✔ purrr     0.3.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(haven)
library(webshot2)
library(survey)

## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: survival
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart

#library(expss)
library(gt)    # must be loaded after expss otherwise tables crash

agebounds<-function(cuts){
  # create typical age labels from a cut string
  out<-character()
  for (i in 1:(length(cuts)-1)) {
    lower<-cuts[i]+1
    if ((is.infinite(lower)) || (lower<0)) lower<-0
    upper<-cuts[i+1]
    if (lower<10) strlower<-paste(" ",lower,sep="") else strlower<-paste(lower)
    if (i<length(cuts)-1) {out<-append(out,paste(strlower,"-",upper,sep=''))}
     else {out<-append(out,paste(lower,"+",sep=''))}
  }
  return(out)
}

toString<-function(x,str) {
  # Stupid function to put in labels in tables (alternative to using factors or strings the whole way)
  # Replaces numbers with corresponding strings
  max<-length(x)
  out<-vector()
  for (i in 1:max) { 
    value<-x[i]
    replacement<-str[value]
    out<-append(out,replacement)   
  }
  return(out)
}

saveTable<-function(table) {
  # Saves a gt table in two formats - word and pdf
  fname<-deparse(match.call()$table)
  gtsave(table,paste(fname,".docx",sep=""))
  gtsave(table,paste(fname,".pdf",sep=""))
}

Output R version

R.version.string

## [1] "R version 4.2.1 (2022-06-23 ucrt)"

Age groups defintions

If one wants to have different age groups, this is the place to change - not in the actual age group/tabulation code The agebreaks are suitable to the cut function, and the agstr contains label strings for the age groups

agebreaks<-c(-1, 4, 11, 17,29, 39, 64, Inf)
agestr<-agebounds(agebreaks)

#Load data files
Load the roster file, the births file and the IDB population file
Note that since knitr is used, it is best to put the data in the default directory
In original IDB download: Changed 100+ to 100, changed variable names (done in excel)
https://www.census.gov/data-tools/demo/idb/#/pop?dashboard_page=country&COUNTRY_YR_ANIM=2025&CCODE=XG&popPages=BYAGE&menu=popViz&show_countries=n&CCODE_SINGLE=XG&subnat_map_admin=ADM1&POP_YEARS=2023,2024

maindata<-"GMS Household roster.sav"   # note this file has Khan Younis and Deir al Balah switched in order as in the questionnaire
birthsdata="GMS Births.sav"
s_govnames<-c("North Gaza"=1,"Gaza City"=2,"Deir al Balah"=3,"Khan Younis"=4,"Rafah"=5) 
fates1 <- 
  haven::read_sav(maindata)
# recode governorates to reduce my confusion about order
fates1<-fates1 %>% mutate(HR00=case_when(HR00==1~1,
                                      HR00==2~2,
                                      HR00==3~4,
                                      HR00==4~3,
                                      HR00==5~5))
#val_lab(fates1$HR00)<-s_govnames

# Births
births<- 
  haven::read_sav(birthsdata)
# recode governorates to reduce my confusion about order
births<-births %>% mutate(HR00=case_when(HR00==1~1,
                                       HR00==2~2,
                                       HR00==3~4,
                                       HR00==4~3,
                                       HR00==5~5))
#val_lab(births$HR00)<-s_govnames
numbirths=nrow(births)
numbirths

## [1] 357

birthsex<-table(births$HR04)

Population <- readxl::read_excel("Population Gaza Single year age groups IDB_2023.xlsx")
# From original IDB download: Changed 100+ to 100, changed variable names
# View(Population)
# Calculate adjustment to population at risk
# Needs to be done on all marginals
USCBPop23<-Population$Total[1]     # Total population
Persons.in.roster.file<-nrow(fates1)
AverageExpansion<-USCBPop23/Persons.in.roster.file
AverageExpansion

## [1] 215.6839

birthsex<-round(birthsex*AverageExpansion)
TotalAtRisk<-USCBPop23+sum(birthsex)     #using estimated births, rather than projected (due to uncertainty in projected due to conflict)
USCBPopAtRisk<-TotalAtRisk
USCBScaleFactor<-USCBPopAtRisk/USCBPop23
USCBScaleFactor

## [1] 1.036694

# The births will be distributed to boys and girls at 0 age.
Population$Total[1]<-TotalAtRisk
Population$Male[1]<-birthsex[1]+Population$Male[1]
Population$Female[2]<-birthsex[2]+Population$Female[2]
Population$Male[2]<-birthsex[1]+Population$Male[2]
Population$Female[1]<-birthsex[2]+Population$Female[1]

Age distributions

Census Bureau age distribution

Note that raking marginals for age and gender are also produced here.
Two sets of age/gender marginals. One is the traditional with separate age and gender, the other collects age and gender in one variable. The latter will force age and gender to match IDB exactly, but at the cost of bigger weight variation than with separate age and gender.

Population$AgeGroups<-as.integer(cut(as.numeric(Population$GROUP), breaks = agebreaks))   #Note change to 0 to 4
AgeGroups<-as.data.frame(group_by(Population,AgeGroups) %>% summarise(Males=sum(Male),
                                               Females=sum(Female)))

AgeGroups<-AgeGroups %>% mutate(AgeGr=case_when(is.na(AgeGroups)~"Total",
                                                .default=agestr[AgeGroups]),
                                AgeGroups=case_when(is.na(AgeGroups)~100,
                                                    .default=AgeGroups))
AgeGroups

##   AgeGroups   Males Females AgeGr
## 1         1  181716  173794   0-4
## 2         2  200191  188991  5-11
## 3         3  165430  157849 12-17
## 4         4  248133  240116 18-29
## 5         5  130574  133015 30-39
## 6         6  146967  149998 40-64
## 7         7   29944   28670   65+
## 8       100 1102955 1072433 Total

AgeGroups$PctFem<-AgeGroups$Females/(AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total'])*100
AgeGroups$PctMal<-AgeGroups$Males/(AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total'])*100

IDBAgeTable <- AgeGroups %>% select(AgeGr,Males,PctMal, Females,PctFem)%>% gt()%>%
  tab_header(
    title = "Gaza Strip population at risk 2023, broad age groups",
    subtitle = "based on the International Data Base, U..S Census Bureau"
      ) %>%   tab_source_note(
    source_note = "Percentages are based on the total population"
    ) %>% tab_source_note(
      source_note = "Births during survey period added"
    )  %>%   tab_source_note(
  source_note = "Source: https://www.census.gov/ International Programs Main/ Data /International Database."
  ) %>% 
  tab_source_note(
    source_note = "Downloaded 25.01.2025" 
  ) %>% 
  cols_label(
   AgeGr="Age", PctMal="Male percent", PctFem="Female percent" 
  ) %>% 
  fmt_number(n_sigfig = 3) %>%
  fmt_number(
    columns=c(PctMal,PctFem),
    decimals=1
  )

  IDBAgeTable

Age	Males	Male percent	Females	Female percent
Gaza Strip population at risk 2023, broad age groups
based on the International Data Base, U..S Census Bureau
0-4	182,000	8.4	174,000	8.0
5-11	200,000	9.2	189,000	8.7
12-17	165,000	7.6	158,000	7.3
18-29	248,000	11.4	240,000	11.0
30-39	131,000	6.0	133,000	6.1
40-64	147,000	6.8	150,000	6.9
65+	29,900	1.4	28,700	1.3
Total	1,100,000	50.7	1,070,000	49.3
Percentages are based on the total population
Births during survey period added
Source: https://www.census.gov/ International Programs Main/ Data /International Database.
Downloaded 25.01.2025

  saveTable(IDBAgeTable)
  #
  # Construct marginals based on the IDB table
  #
  IDBGenderMarginal<-data.frame(HR04=c(1,2), Freq=c(AgeGroups$Males[nrow(AgeGroups)],AgeGroups$Females[nrow(AgeGroups)]))  # IDB Gender 
  IDBGenderMarginal

##   HR04    Freq
## 1    1 1102955
## 2    2 1072433

  IDBTotalAge<-AgeGroups$Males[-nrow(AgeGroups)]+ AgeGroups$Females[-nrow(AgeGroups)]
  IDBTotalAgeMarginal<-data.frame(AgeGroups=seq(1,length(IDBTotalAge)),Freq=IDBTotalAge)
  IDBTotalAgeMarginal

##   AgeGroups   Freq
## 1         1 355510
## 2         2 389182
## 3         3 323279
## 4         4 488249
## 5         5 263589
## 6         6 296965
## 7         7  58614

  sum(IDBTotalAgeMarginal$Freq)==sum(IDBGenderMarginal$Freq)   # This should be true

## [1] TRUE

  # Also construct a marginal that is a blend of age and gender - to get that exact
  IDBMales<-data.frame(AgeSex=paste(agestr,"Male"),Freq=AgeGroups$Males[-nrow(AgeGroups)])
  IDBFemales<-data.frame(AgeSex=paste(agestr,"Female"),Freq=AgeGroups$Females[-nrow(AgeGroups)])
  IDBSeparateSexesAndAgeMarginal<-bind_rows(IDBMales,IDBFemales)
  IDBSeparateSexesAndAgeMarginal

##          AgeSex   Freq
## 1      0-4 Male 181716
## 2     5-11 Male 200191
## 3    12-17 Male 165430
## 4    18-29 Male 248133
## 5    30-39 Male 130574
## 6    40-64 Male 146967
## 7      65+ Male  29944
## 8    0-4 Female 173794
## 9   5-11 Female 188991
## 10 12-17 Female 157849
## 11 18-29 Female 240116
## 12 30-39 Female 133015
## 13 40-64 Female 149998
## 14   65+ Female  28670

  sum(IDBSeparateSexesAndAgeMarginal$Freq)==sum(IDBGenderMarginal$Freq)   # This should be true

## [1] TRUE

Age distribution in the survey data file

# add roster and birth together

fates<-rbind(fates1,births)  
fates$AgeGroups<-as.integer(cut(as.numeric(fates$HR03), breaks = agebreaks))
fates<-fates %>% mutate(AgeGr=case_when(is.na(AgeGroups)~"Total",
                                                     .default=agestr[AgeGroups]),
                                        Gender=case_when(HR04==1~'Male',
                                        HR04==2~'Female'))
fates$AgeSex<-paste(fates$AgeGr,fates$Gender,sep=" ")

RosterAge<-fates %>% group_by(AgeGr,Gender)  %>% summarize(sumpop=n())

## `summarise()` has grouped output by 'AgeGr'. You can override using the
## `.groups` argument.

RosterAge<-reshape(as.data.frame(RosterAge),idvar="AgeGr", timevar="Gender",direction="wide")
RosterAge

##    AgeGr sumpop.Female sumpop.Male
## 1    0-4           630         702
## 3   5-11           715         801
## 5  12-17           611         585
## 7  18-29          1416        1264
## 9  30-39           691         764
## 11 40-64           802         917
## 13   65+            87         101

names(RosterAge)<-c('AgeGr','FemaleRoster','MaleRoster')
RosterAge<-rbind(RosterAge,data.frame(AgeGr='Total',FemaleRoster=sum(RosterAge$FemaleRoster),MaleRoster=sum(RosterAge$MaleRoster)))
RosterAge$PctFemRoster<-RosterAge$FemaleRoster/(RosterAge$MaleRoster[RosterAge$AgeGr=='Total']+RosterAge$FemaleRoster[RosterAge$AgeGr=='Total'])*100
RosterAge$PctMalRoster<-RosterAge$MaleRoster/(RosterAge$MaleRoster[RosterAge$AgeGr=='Total']+RosterAge$FemaleRoster[RosterAge$AgeGr=='Total'])*100
RosterAge

##    AgeGr FemaleRoster MaleRoster PctFemRoster PctMalRoster
## 1    0-4          630        702    6.2462820     6.960143
## 3   5-11          715        801    7.0890343     7.941701
## 5  12-17          611        585    6.0579020     5.800119
## 7  18-29         1416       1264   14.0392623    12.532223
## 9  30-39          691        764    6.8510807     7.574856
## 11 40-64          802        917    7.9516161     9.091810
## 13   65+           87        101    0.8625818     1.001388
## 12 Total         4952       5134   49.0977593    50.902241

SurveyAgeTable <- RosterAge %>% select(AgeGr,MaleRoster,PctMalRoster, FemaleRoster,PctFemRoster) %>% 
  gt()%>%
  tab_header(
    title = "Household members October 6, 2023 and births in survey period by broad age groups" ) %>% 
      tab_source_note(source_note = "Unweighted. Percentages are based on the total")  %>% 
      tab_source_note(source_note = paste("Based on ",maindata))  %>% 
  cols_label(
    AgeGr="Age",
    MaleRoster="Male",PctMalRoster="Percent male", FemaleRoster="Females",PctFemRoster="Percent female"
  ) %>% 
  fmt_number(
    columns=c(PctMalRoster,PctFemRoster),
    decimals=1
  )


SurveyAgeTable

Age	Male	Percent male	Females	Percent female
Household members October 6, 2023 and births in survey period by broad age groups
0-4	702	7.0	630	6.2
5-11	801	7.9	715	7.1
12-17	585	5.8	611	6.1
18-29	1264	12.5	1416	14.0
30-39	764	7.6	691	6.9
40-64	917	9.1	802	8.0
65+	101	1.0	87	0.9
Total	5134	50.9	4952	49.1
Unweighted. Percentages are based on the total
Based on GMS Household roster.sav

saveTable(SurveyAgeTable)

Construct variables, data frames and vectors needed for survey design definition

Construct governorate marginals

Since the adjusted weights also should reflect what we think is reality, not only the sampling, we use marginals adjusted to the IDB Census Bureau population.

Governorate	PCBS 2023	IDB adjusted	Questionnaire code	North South code	PSUs
North Gaza	444412	418833	1	1	36
Gaza City	749100	705983	2	2	72
Deir al Balah	319208	300835	4	3	28
Khan Younis	438557	413315	3	4	42
Rafah	275267	259423	5	5	22

The SPSS data file uses the questionnaire code, but that has been recoded into the north south code (above)
Note that the scale factor to account for new births is applied.

governoratemarginals<-data.frame(HR00=seq(1,5,1), Freq=c( 418833,705983, 300835,413315, 259423)*USCBScaleFactor )
governoratemarginals

##   HR00     Freq
## 1    1 434201.8
## 2    2 731888.6
## 3    3 311873.9
## 4    4 428481.3
## 5    5 268942.4

Construct household size marginals

Note that we have to scale to current population and since we are using the roster file we have to use marginals that are population in each size group There are two versions using household sizes from the 2017 census, and from the 2019-2020 MICS survey First the census version

householdsize2017 <-c(10661,31474,34084,43481,49305, 50239,42741,30693,19041,(10547 + 12444))
hhsizes<-c(seq(1,9,1),11.43151668)
popbyhouseholdsize2017<-householdsize2017*hhsizes
scalefactor2017to2023<-sum(IDBGenderMarginal$Freq)/sum(popbyhouseholdsize2017)
householdsizemarginals2017<-popbyhouseholdsize2017*scalefactor2017to2023
householdsizemarginals2017<-data.frame(hsize10=seq(1,10,1),Freq=householdsizemarginals2017)
householdsizemarginals2017

##    hsize10      Freq
## 1        1  12357.99
## 2        2  72967.87
## 3        3 118528.16
## 4        4 201608.69
## 5        5 285766.10
## 6        6 349415.35
## 7        7 346810.68
## 8        8 284628.95
## 9        9 198647.00
## 10      10 304657.21

# Here from MICS 2019-2020 household file (Downloaded from https://mics.unicef.org/surveys)
#the initial data are persons in each household size group
hhsizemarginalsMICS<-c(110,
                          501,
                          1069,
                          1841,
                          2458,
                          3218,
                          3289,
                          2255,
                          1816,
                          2502)  # From 1 to 10+
MICSScalefactor<-sum(IDBGenderMarginal$Freq)/sum(hhsizemarginalsMICS)
householdsizemarginalsMICS<-hhsizemarginalsMICS*MICSScalefactor

householdsizemarginalsMICS<-data.frame(hsize10=seq(1,10,1),Freq=householdsizemarginalsMICS)
householdsizemarginalsMICS

##    hsize10      Freq
## 1        1  12555.36
## 2        2  57183.98
## 3        3 122015.31
## 4        4 210131.14
## 5        5 280555.31
## 6        6 367301.46
## 7        7 375405.38
## 8        8 257384.96
## 9        9 207277.64
## 10      10 285577.46

# Difference between distributions based on 2017 and MICS
round(householdsizemarginals2017$Freq-householdsizemarginalsMICS$Freq)

##  [1]   -197  15784  -3487  -8522   5211 -17886 -28595  27244  -8631  19080

sum(householdsizemarginals2017$Freq)-sum(householdsizemarginalsMICS$Freq)    # Should be 0

## [1] 0

Get household sizes into the data file

Two variables:
- hsize : the household size
- hsize10: the household size, but everything over 10 is set at 10. This is the one used to rake

fates$onedummy<-1   # simple counting variable that is also used initially for weights
pop23<-AgeGroups$Males[AgeGroups$AgeGr=='Total']+AgeGroups$Females[AgeGroups$AgeGr=='Total']
fates$simpleExpand<-pop23/nrow(fates)
hsiz=aggregate(fates$onedummy,list(fates$prikey),sum)
names(hsiz)<-c("prikey","hsize")
fates<-left_join(fates,hsiz,by="prikey")
fates$hsize10<-ifelse(fates$hsize<10,fates$hsize,10)

Some useful strings

s_mf<-c("Male", "Female")
s_status<-c("Resident","Left the Gaza Strip","Elsewhere in the Gaza Strip","Dead","Missing","Imprisoned" )
s_cause_dead_full<-c("Disease etc","Disease etc, no aid","Accident","Violent", "Not known")
s_Cause_dead_short<-c("Total violent deaths","Total nonviolent deaths")
s_governorates<-c("Northern Gaza","Gaza city","Deir al-Balah","Khan Younis","Rafah")
s_maincat<-c("Children (<18)","Women (18-64)","Elderly (65+)","Men (18-64)")
s_WEC<-"Women, elderly, and children"

Define the survey design

The marginals for raking are the following:

Age gender related (one must either use HR04 and AgeGroups or only AgeSex):
- AgeSex matches IDBSeparateSexesAndAgeMarginal
- AgeGroups matches IDBTotalAgeMarginal
- HR04 matches IDBGenderMarginal
Governorate:
- HR00 matches governoratemarginals
Household size (use one, not both)
- match hsize10 to householdsizemarginalsMICS, or
- match hsize10 to householdsizemarginals2017

Categories for reporting

Needs to be done before design definitions since design incorporates data

fates<- fates %>% mutate(Categories=case_when(HR07< 18 ~ 1,
                                              HR04==2 & HR07>17 & HR07<65 ~ 2,
                                              HR07>64 ~3,
                                              HR04==1 & HR07>17 & HR07<65 ~ 4),
                         WEC=case_when(is.na(HR07)~0,
                                             HR04==2 | (HR07<18 | HR07>64)~1,
                                             HR04==1 & (HR07>17 & HR07<65)~0),
                         WECmiss=case_when(HR04==2 | (HR03<18 | HR03>64)~1,
                                             HR04==1 & (HR03>17 & HR03<65)~0),
                         Allmiss=if_else(HR05==5,1,0),
                         CatMiss=case_when(HR03< 18 ~ 1,
                                              HR04==2 & HR03>17 & HR03<65 ~ 2,
                                              HR03>64 ~3,
                                              HR04==1 & HR03>17 & HR03<65 ~ 4),
                         typedeath=case_when(HR06==4~1,
                                             HR06<4 ~2),
                         Dead=if_else(HR05==4,1,0),
                         Violent=if_else(HR05==4& HR06==4,1,0),
                         Nonviolent=if_else(HR05==4 & HR06<4,1,0),
                         Missing=if_else(HR05==5,1,0))
fates<- fates %>% mutate(ratedenominator = case_when(HR01>50 & (HR05==4 | HR05==2)~0.25,
                                                     HR01>5~0.5,
                                                     HR05==1~1,
                                                     HR05==2~0.5,
                                                     HR05==3~1,
                                                     HR05==4~0.5,
                                                     HR05==5~1,
                                                     HR05==6~1))

The actual specification of the design

fates$stratum<-fates$HR00*10+fates$PSUType
uwdesign<-svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~onedummy)    # uw prefix means unweighted
tbs<-svytable(~HR05+HR04,uwdesign)
ftable(tbs)

##      HR04    1    2
## HR05               
## 1         4520 4569
## 2          140  145
## 3           81   56
## 4          285  176
## 5           57    5
## 6           51    1

#  The following should match the svytable (simple unweighted table)
xtabs(~HR05+HR04,fates)

##     HR04
## HR05    1    2
##    1 4520 4569
##    2  140  145
##    3   81   56
##    4  285  176
##    5   57    5
##    6   51    1

# 
# Full raked design
rakedesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginalsMICS,governoratemarginals))
svytable(~HR04+HR05,rakedesign)

##     HR05
## HR04           1           2           3           4           5           6
##    1 983616.6154  27747.0024  14841.8167  56698.8796  10783.9508   9266.5554
##    2 995888.0906  28521.4894  11775.7981  34983.9959   1111.6395    152.1662

svyby(~onedummy,~HR04,design=rakedesign,svytotal)

##   HR04 onedummy        se
## 1    1  1102955 0.0661335
## 2    2  1072433 0.0661335

#
# Check weights
#
weights<-weights(rakedesign)
boxplot(weights)

summary(weights)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   99.61  144.03  197.99  215.68  256.26  721.09

# This is a bit high variation
# so let us fix that
trimmedrakedesign<-trimWeights(rakedesign, upper=median(weights*2), lower=90)
weights<-weights(trimmedrakedesign)
boxplot(weights)

summary(weights)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   104.8   149.2   203.2   215.7   261.4   400.9

# Export data file
GMS_Used_data<-model.frame(trimmedrakedesign)
GMS_Used_data<-cbind(GMS_Used_data,weights)
saveRDS(GMS_Used_data,file="GMS_at_risk_apr_18_2025.rds")
rm(GMS_Used_data)

It is now possible to define other designs, and then use them as the current design

currentdesign<-trimmedrakedesign

tbl<-round(svyby(~onedummy,~HR05+~HR04,design=currentdesign,svytotal,vartype=c("se","ci")))
tbl

##     HR05 HR04 onedummy   se   ci_l    ci_u
## 1.1    1    1   982822 7004 969095  996550
## 2.1    2    1    28285 3118  22175   34396
## 3.1    3    1    15102 2256  10680   19523
## 4.1    4    1    56705 3995  48874   64536
## 5.1    5    1    11071 1983   7186   14957
## 6.1    6    1     9422 1670   6149   12696
## 1.2    1    2   995139 6370 982653 1007624
## 2.2    2    2    28911 4082  20912   36911
## 3.2    3    2    11923 2158   7694   16152
## 4.2    4    2    34712 3751  27361   42063
## 5.2    5    2     1137  559     42    2233
## 6.2    6    2      157  160   -156     470

tbl$HR05<-toString(tbl$HR05,s_status)
tbl$HR04<-toString(tbl$HR04,s_mf)
names(tbl)<-c("Status","Gender", "Estimate","se","ci_l","ci_u")
tbl

##                          Status Gender Estimate   se   ci_l    ci_u
## 1.1                    Resident   Male   982822 7004 969095  996550
## 2.1         Left the Gaza Strip   Male    28285 3118  22175   34396
## 3.1 Elsewhere in the Gaza Strip   Male    15102 2256  10680   19523
## 4.1                        Dead   Male    56705 3995  48874   64536
## 5.1                     Missing   Male    11071 1983   7186   14957
## 6.1                  Imprisoned   Male     9422 1670   6149   12696
## 1.2                    Resident Female   995139 6370 982653 1007624
## 2.2         Left the Gaza Strip Female    28911 4082  20912   36911
## 3.2 Elsewhere in the Gaza Strip Female    11923 2158   7694   16152
## 4.2                        Dead Female    34712 3751  27361   42063
## 5.2                     Missing Female     1137  559     42    2233
## 6.2                  Imprisoned Female      157  160   -156     470

tblT<-round(svyby(~onedummy,~HR05,design=currentdesign,svytotal,vartype=c("se","ci")))

tblT$HR05<-toString(tblT$HR05,s_status)
tblT$Gender<-rep("Total",length(s_status))
tblT<-tblT[,c("HR05","Gender","onedummy","se","ci_l","ci_u")]
tblT

##                          HR05 Gender onedummy    se    ci_l    ci_u
## 1                    Resident  Total  1977961 11502 1955418 2000505
## 2         Left the Gaza Strip  Total    57197  6017   45404   68989
## 3 Elsewhere in the Gaza Strip  Total    27024  3744   19687   34362
## 4                        Dead  Total    91417  6447   78782  104052
## 5                     Missing  Total    12209  2198    7901   16516
## 6                  Imprisoned  Total     9580  1692    6264   12895

names(tblT)<-c("Status","Gender", "Estimate","se","ci_l","ci_u")
tblT

##                        Status Gender Estimate    se    ci_l    ci_u
## 1                    Resident  Total  1977961 11502 1955418 2000505
## 2         Left the Gaza Strip  Total    57197  6017   45404   68989
## 3 Elsewhere in the Gaza Strip  Total    27024  3744   19687   34362
## 4                        Dead  Total    91417  6447   78782  104052
## 5                     Missing  Total    12209  2198    7901   16516
## 6                  Imprisoned  Total     9580  1692    6264   12895

tblT<-bind_rows(tbl,tblT)
mainoutcomestable<-tblT%>% 
  group_by(Gender) %>%
  gt(row_group_as_column = TRUE) %>%
  tab_header(
    title = "Gaza Strip Survey main status outcomes") %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size") %>%
  tab_source_note(source_note = paste("Based on Gaza Mortality Survey ",maindata))  %>% 
    cols_merge(
       columns=c(ci_l,ci_u),
       pattern="({1}, {2})")%>%
    cols_label(se~"Standard<br>error",
               ci_l~"Confidence<br>Interval",
               .fn = md) %>% 
     fmt_number(n_sigfig = 3)
mainoutcomestable

	Status	Estimate	Standard error	Confidence Interval
Gaza Strip Survey main status outcomes
Male	Resident	983,000	7,000	(969,000, 997,000)
	Left the Gaza Strip	28,300	3,120	(22,200, 34,400)
	Elsewhere in the Gaza Strip	15,100	2,260	(10,700, 19,500)
	Dead	56,700	4,000	(48,900, 64,500)
	Missing	11,100	1,980	(7,190, 15,000)
	Imprisoned	9,420	1,670	(6,150, 12,700)
Female	Resident	995,000	6,370	(983,000, 1,010,000)
	Left the Gaza Strip	28,900	4,080	(20,900, 36,900)
	Elsewhere in the Gaza Strip	11,900	2,160	(7,690, 16,200)
	Dead	34,700	3,750	(27,400, 42,100)
	Missing	1,140	559	(42.0, 2,230)
	Imprisoned	157	160	(−156, 470)
Total	Resident	1,980,000	11,500	(1,960,000, 2,000,000)
	Left the Gaza Strip	57,200	6,020	(45,400, 69,000)
	Elsewhere in the Gaza Strip	27,000	3,740	(19,700, 34,400)
	Dead	91,400	6,450	(78,800, 104,000)
	Missing	12,200	2,200	(7,900, 16,500)
	Imprisoned	9,580	1,690	(6,260, 12,900)
Raked to age,gender, governorate, and household size
Based on Gaza Mortality Survey GMS Household roster.sav

saveTable(mainoutcomestable)

Main table of deaths

Baseline deaths are IDB estimated deaths 2023 6348, 2024 6188,2025 6043 87 days in 2023
#https://www.census.gov/data-tools/demo/idb/#/table?dashboard_page=sources&COUNTRY_YR_ANIM=2023&CCODE_SINGLE=XG&subnat_map_admin=ADM1&CCODE=XG&COUNTRY_YEAR=2023&menu=tableViz&TABLE_YEARS=2023,2024,2025&TABLE_USE_RANGE=N&TABLE_USE_YEARS=Y&TABLE_STEP=1&TABLE_ADD_YEARS=2023,2024,2025&quickReports=CUSTOM&CUSTOM_COLS=CDR,DEATHS
Downloaded 8.2.2025
Median day of fieldwork is Jan 2, 2025

end <- as.Date("2023-12-31")
start <- strptime("2023-10-06", format="%Y-%m-%d", tz="UTC")
DaysIn2023<-as.numeric(difftime(as.POSIXct(end), as.POSIXct(start, tz="UTC"), units="days"))
DaysIn2025=2
baseline<-6348*DaysIn2023/365+6188+6043*DaysIn2025/365    # might be considered slightly misplaced concreteness
# Define a function to make it easier to repeat
#### Makemaintable function ####
makemaintable<-function(currentdesign,designdescription) {

    #First select violent deaths - HR06==4
    tb1<-round(svyby(~onedummy,~Categories,design=subset(currentdesign,HR06==4),svytotal,vartype=c("se","ci")))  #age and gender
    tb1$Categories<-toString(tb1$Categories,s_maincat)
    tbwec<- svyby(~onedummy,~WEC,design=subset(currentdesign,HR06==4 & WEC==1),svytotal,vartype=c("se","ci"))
    names(tbwec)[1]<-"Categories"  
    tbwec$Categories<-s_WEC
    tb2<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))   # violent / nonviolent
    names(tb2)[1]<-"Categories"
    tb2$Categories<-toString(tb2$Categories,s_Cause_dead_short)
    tb2
    
    tbp1<-svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci"))   #Women,children, elderly, percent
    tbp2<-confint(svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci")))
    tb3<-data.frame(Categories=paste(s_WEC," (%)"),onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
    tb3
    tb4<-round(svyby(~onedummy,~CatMiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))  #missing age and gender
    names(tb4)[1]<-"Categories"
    tb4<-add_row(tb4,Categories=3,onedummy=0, se=0,ci_l=0,ci_u=0,.before=3)
    tb4$Categories<-toString(tb4$Categories,s_maincat)
    tb4
    tb5<-round(svyby(~onedummy,~Allmiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))   # missing
    names(tb5)[1]<-"Categories"
    tb5[1]<-"Total missing persons"
    tb5
    tbp1<-svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci"))   #Women,children, elderly, percent
    tbp2<-confint(svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci")))
    tb6<-data.frame(CatMiss=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
    names(tb6)[1]<-"Categories"
    tb6
    
    # make a collected frame for the table
    tb1$type<-"Violent deaths"
    tb2$type<-"Violent deaths"
    tb2$type[2]<-"Nonviolent deaths"
    tb3$type<-"Violent deaths"
    tb4$type<-"Missing persons"
    tb5$type<-"Missing persons"
    tb6$type<-"Missing persons"
    tbwec$type<-"Violent deaths"
    #
    MF<-rbind(tb1,tbwec,tb2[1,],tb3)
    
    eframe<-data.frame(Categories="Excess nonviolent deaths", onedummy=round(tb2[2,2]-baseline),se=round(tb2[2,3]),ci_l=round(tb2[2,4]-baseline),ci_u=round(tb2[2,5]-baseline),type="Nonviolent deaths")
    MF<-rbind(MF,tb2[2,],eframe)
    
    MF<-rbind(MF,tb4, tb5,tb6)
    MF
    row_number(MF)
    return(MF%>% select(type,Categories,onedummy,se,ci_l,ci_u) %>%
      group_by(type) %>%
      gt(row_group_as_column = TRUE) %>%
      fmt_number(n_sigfig = 3)%>%
      tab_header(
        title = md("Estimates of violent, nonviolent mortality and missing persons<br>by broad age and gender groups")) %>%
        tab_source_note(source_note =designdescription ) %>%
        tab_source_note(source_note = paste("Based on ",maindata))  %>% 
      cols_merge(
        columns=c(ci_l,ci_u),
        pattern="({1}, {2})")%>%
      cols_label(se~"Standard<br>error",
                 ci_l~"Confidence<br>Interval",
                 onedummy="Estimate",
                 .fn = md)
      )

}

trimraketable<-makemaintable(trimmedrakedesign,"Raked to age,gender, governorate, and household size")
trimraketable

	Categories	Estimate	Standard error	Confidence Interval
Estimates of violent, nonviolent mortality and missing persons by broad age and gender groups
Violent deaths	Children (<18)	22,800	3,090	(16,700, 28,800)
	Women (18-64)	16,600	2,220	(12,200, 20,900)
	Elderly (65+)	2,870	943	(1,020, 4,720)
	Men (18-64)	32,900	2,700	(27,600, 38,200)
	Women, elderly, and children	42,200	4,630	(33,100, 51,300)
	Total violent deaths	75,200	5,920	(63,600, 86,800)
	Women, elderly, and children (%)	56.2	2.94	(50.4, 61.9)
Nonviolent deaths	Total nonviolent deaths	16,300	2,040	(12,300, 20,200)
Nonviolent deaths	Excess nonviolent deaths	8,540	2,040	(4,540, 12,500)
Missing persons	Children (<18)	3,610	1,040	(1,570, 5,650)
	Women (18-64)	131	134	(−133, 394)
	Elderly (65+)	0	0	(0, 0)
	Men (18-64)	8,470	1,650	(5,240, 11,700)
	Total missing persons	12,200	2,200	(7,900, 16,500)
	Women, elderly, and children	30.6	6.40	(18.1, 43.2)
Raked to age,gender, governorate, and household size
Based on GMS Household roster.sav

saveTable(trimraketable)

Table of violent and nonviolent deaths by governorate

## Make a table of deaths per governorate
tb1<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))  # all deaths by governorate
tb1$type<-"Total"
names(tb1)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb1$Governorate<-toString(tb1$Governorate,s_governorates)
tb1

##     Governorate Estimate   se  ci_l  ci_u  type
## 1 Northern Gaza    15964 3232  9630 22299 Total
## 2     Gaza city    35661 4673 26502 44820 Total
## 3 Deir al-Balah    12375 1894  8663 16087 Total
## 4   Khan Younis    18197 2468 13360 23035 Total
## 5         Rafah     9219 1816  5660 12779 Total

tb2<-round(ftable(svytotal(~onedummy,design=subset(currentdesign,HR05==4,vartype=c("se","ci"))) ))     #total deaths, all governorates
tb2ci<-ftable(round(confint(svytotal(~onedummy,design=subset(currentdesign,HR05==4) ))))
tb2<-data.frame(Governorate="Total",Estimate=tb2[1,1],se=tb2[1,2],ci_l=tb2ci[1,1],ci_l=tb2ci[1,2],type="Total")
names(tb2)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb2[1,1]<-"Total"
tb2

##   Governorate Estimate   se  ci_l   ci_u  type
## 1       Total    91417 6447 78782 104052 Total

tb3<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))   # Total violent / nonviolent
tb3$type=c("Violent deaths","Nonviolent deaths")
names(tb3)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb3$Governorate<-"Total"
tb3

##   Governorate Estimate   se  ci_l  ci_u              type
## 1       Total    75164 5919 63563 86766    Violent deaths
## 2       Total    16253 2039 12257 20249 Nonviolent deaths

tb4<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,typedeath==1),svytotal,vartype=c("se","ci")))   # violent 
tb4$type<-"Violent deaths"
names(tb4)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb4$Governorate<-toString(tb4$Governorate,s_governorates)
tb4

##     Governorate Estimate   se  ci_l  ci_u           type
## 1 Northern Gaza    13201 2848  7619 18783 Violent deaths
## 2     Gaza city    31291 4547 22379 40203 Violent deaths
## 3 Deir al-Balah     8276 1424  5486 11066 Violent deaths
## 4   Khan Younis    15065 2155 10842 19288 Violent deaths
## 5         Rafah     7330 1629  4137 10524 Violent deaths

tb5<-round(svyby(~onedummy,~HR00,design=subset(currentdesign,typedeath==2),svytotal,vartype=c("se","ci")))   # Nonviolent 
tb5$type<-"Nonviolent deaths"
names(tb5)<-c("Governorate","Estimate","se","ci_l","ci_u","type")
tb5$Governorate<-toString(tb5$Governorate,s_governorates)
tb5

##     Governorate Estimate   se ci_l ci_u              type
## 1 Northern Gaza     2763  821 1154 4372 Nonviolent deaths
## 2     Gaza city     4370  975 2460 6280 Nonviolent deaths
## 3 Deir al-Balah     4099 1173 1800 6397 Nonviolent deaths
## 4   Khan Younis     3132  890 1387 4878 Nonviolent deaths
## 5         Rafah     1889  704  509 3269 Nonviolent deaths

# collect t he governorate table
govtab<-rbind(tb1,tb2,tb4,tb3[1,],tb5,tb3[2,])
governoratetable<-govtab %>% select(type,Governorate,Estimate,se,ci_l,ci_u) %>%
  group_by(type) %>%
  gt(row_group_as_column = TRUE) %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = md("Estimates of violent, nonviolent mortality<br>by governorate of residence on October 6, 2023")) %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
  tab_source_note(source_note = paste("Based on ",maindata))  %>% 
  cols_merge(
    columns=c(ci_l,ci_u),
    pattern="({1}, {2})")%>%
  cols_label(se~"Standard<br>error",
             ci_l~"Confidence<br>Interval",
             .fn = md)
governoratetable

	Governorate	Estimate	Standard error	Confidence Interval
Estimates of violent, nonviolent mortality by governorate of residence on October 6, 2023
Total	Northern Gaza	16,000	3,230	(9,630, 22,300)
	Gaza city	35,700	4,670	(26,500, 44,800)
	Deir al-Balah	12,400	1,890	(8,660, 16,100)
	Khan Younis	18,200	2,470	(13,400, 23,000)
	Rafah	9,220	1,820	(5,660, 12,800)
	Total	91,400	6,450	(78,800, 104,000)
Violent deaths	Northern Gaza	13,200	2,850	(7,620, 18,800)
	Gaza city	31,300	4,550	(22,400, 40,200)
	Deir al-Balah	8,280	1,420	(5,490, 11,100)
	Khan Younis	15,100	2,160	(10,800, 19,300)
	Rafah	7,330	1,630	(4,140, 10,500)
	Total	75,200	5,920	(63,600, 86,800)
Nonviolent deaths	Northern Gaza	2,760	821	(1,150, 4,370)
	Gaza city	4,370	975	(2,460, 6,280)
	Deir al-Balah	4,100	1,170	(1,800, 6,400)
	Khan Younis	3,130	890	(1,390, 4,880)
	Rafah	1,890	704	(509, 3,270)
	Total	16,300	2,040	(12,300, 20,200)
Raked to age,gender, governorate, and household size
Based on GMS Household roster.sav

saveTable(governoratetable)

Crude death rates

Calculate crude death rate, changed from initial version and added confidence intervals. Also added estimate of rates for the whole period, and the annualized rate. The annualized rate is the yearly rate that, if valid, throughout the survey period, would result approximately in the total number of deaths observed. The approximation occurs because the calculation depends on simplifying assumptions about exposure. Note that the Crude death rate calculated here is different from the probability of dying

ViolentCDR<-svyratio(~Violent,~ratedenominator,design=currentdesign)
NonviolentCDR<-svyratio(~Nonviolent,~ratedenominator,design=currentdesign)
TotalCDR<-svyratio(~Dead,~ratedenominator,design=currentdesign)
ViolentCDRCI<-confint(svyratio(~Violent,~ratedenominator,design=currentdesign))
NonviolentCDRCI<-confint(svyratio(~Nonviolent,~ratedenominator,design=currentdesign))
TotalCDRCI<-confint(svyratio(~Dead,~ratedenominator,design=currentdesign))



tabRatesA<-data.frame(Type=c("Violent","Nonviolent","Total"),CDR=c(ViolentCDR$ratio[1,1],NonviolentCDR$ratio[1,1],TotalCDR$ratio[1,1])*1000*12/15,
                    SE=sqrt(c(ViolentCDR$var[1,1],NonviolentCDR$var[1,1],TotalCDR$var[1,1]))*1000*12/5,
                    l_ci=c(ViolentCDRCI[1,1],NonviolentCDRCI[1],TotalCDRCI[1])*1000*12/15,
                    u_ci=c(ViolentCDRCI[1,2],NonviolentCDRCI[2],TotalCDRCI[2])*1000*12/15)

AnnualizedCDRTable<-tabRatesA %>% gt() %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = md("Annualized mortality rates for the Gaza Strip<br>October 6, 2023 to January 5, 2025")) %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
  tab_source_note(source_note = md(paste("Based on household roster and births <br>and IDB US Census Bureau population estimates"))) %>%
  cols_merge(
    columns=c(l_ci,u_ci),
    pattern="({1}, {2})")%>%
  cols_label(SE~"Standard<br>error",
             l_ci~"Confidence<br>Interval",
             .fn = md)
AnnualizedCDRTable

Type	CDR	Standard error	Confidence Interval
Annualized mortality rates for the Gaza Strip October 6, 2023 to January 5, 2025
Violent	33.1	8.01	(27.9, 38.4)
Nonviolent	7.16	2.70	(5.40, 8.93)
Total	40.3	8.74	(34.6, 46.0)
Raked to age,gender, governorate, and household size
Based on household roster and births and IDB US Census Bureau population estimates

saveTable(AnnualizedCDRTable)
#
tabRates<-data.frame(Type=c("Violent","Nonviolent","Total"),CDR=c(ViolentCDR$ratio[1,1],NonviolentCDR$ratio[1,1],TotalCDR$ratio[1,1])*1000,
                     SE=sqrt(c(ViolentCDR$var[1,1],NonviolentCDR$var[1,1],TotalCDR$var[1,1]))*1000,
                     l_ci=c(ViolentCDRCI[1,1],NonviolentCDRCI[1],TotalCDRCI[1])*1000,
                     u_ci=c(ViolentCDRCI[1,2],NonviolentCDRCI[2],TotalCDRCI[2])*1000)
CDRTable<-tabRates %>% gt() %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = md("Mortality rates for the Gaza Strip<br>October 6, 2023 to January 5, 2025")) %>%
  tab_source_note(source_note ="Raked to age,gender, governorate, and household size" ) %>%
  tab_source_note(source_note = md(paste("Based on household roster and births <br>and IDB US Census Bureau population estimates"))) %>%
cols_merge(
  columns=c(l_ci,u_ci),
  pattern="({1}, {2})")%>%
  cols_label(SE~"Standard<br>error",
             l_ci~"Confidence<br>Interval",
             .fn = md)

CDRTable

Type	CDR	Standard error	Confidence Interval
Mortality rates for the Gaza Strip October 6, 2023 to January 5, 2025
Violent	41.4	3.34	(34.9, 48.0)
Nonviolent	8.95	1.13	(6.75, 11.2)
Total	50.4	3.64	(43.2, 57.5)
Raked to age,gender, governorate, and household size
Based on household roster and births and IDB US Census Bureau population estimates

saveTable(CDRTable)

In comparison Jammaludine et al writes: “We estimated an annualised rate of deaths from traumatic injury of 39·3 per 1000 people (95% CI 35·7–49·4) or 1·1 per 10 000 person-days. In comparison, the crude death rate from all causes in 2022 was 2·8 per 1 000 people,28 yielding a rate ratio of 14·0 (95% CI 12·8–17·6).

Different version of CDR to get confidence intervals.
Strategy is to count deaths as 1, and exposure as
1 if living (not dead, in prison, missing, moved within the Gaza Strip, not known) 0.5 if dead (assuming half exposure time) 0.5 migrated (again assuming half exposure time) 0.5 if child born during the period (again, half exposure time). 0.25 if child is born during period and migrated Then rate=sum(deathsw)/sum(exposurew), SE can be obtained with linearization. annualized rate is rate12/5 standard error of annualized rate is SE(rate)12/15 (because var(ek)=var(e)k^2, and SE = sqrt(var)

Sensitivity

The basic idea is to run the main table with different rakings, and also with the exclusion of team 9.

makemaintableDF<-function(currentdesign) {
  
  #First select violent deaths - HR06==4
  tb1<-round(svyby(~onedummy,~Categories,design=subset(currentdesign,HR06==4),svytotal,vartype=c("se","ci")))  #age and gender
  tb1$Categories<-toString(tb1$Categories,s_maincat)
  tb1    
  tb2<-round(svyby(~onedummy,~typedeath,design=subset(currentdesign,HR05==4),svytotal,vartype=c("se","ci")))   # violent / nonviolent
  names(tb2)[1]<-"Categories"
  tb2$Categories<-toString(tb2$Categories,s_Cause_dead_short)
  tb2
  tbp1<-svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci"))   #Women,children, elderly, percent
  tbp2<-confint(svymean(~WEC,design=subset(currentdesign,HR06==4),vartype=c("se","ci")))
  tb3<-data.frame(Categories=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
  tb3
  tb4<-round(svyby(~onedummy,~CatMiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))  #missing age and gender
  names(tb4)[1]<-"Categories"
  tb4<-add_row(tb4,Categories=3,onedummy=0, se=0,ci_l=0,ci_u=0,.before=3)
  tb4$Categories<-toString(tb4$Categories,s_maincat)
  tb4
  tb5<-round(svyby(~onedummy,~Allmiss,design=subset(currentdesign,HR05==5),svytotal,vartype=c("se","ci")))   # missing
  names(tb5)[1]<-"Categories"
  tb5[1]<-"Total missing persons"
  tb5
  tbp1<-svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci"))   #Women,children, elderly, percent
  tbp2<-confint(svymean(~WECmiss,design=subset(currentdesign,HR05==5),vartype=c("se","ci")))
  tb6<-data.frame(CatMiss=s_WEC,onedummy=tbp1[1]*100,se=ftable(tbp1)[2]*100,ci_l=tbp2[1]*100,ci_u=tbp2[2]*100)
  names(tb6)[1]<-"Categories"
  tb6
  
  # make a collected frame for the table
  tb1$type<-"Violent deaths"
  tb2$type<-"Violent deaths"
  tb2$type[2]<-"Nonviolent deaths"
  tb3$type<-"Violent deaths"
  tb4$type<-"Missing persons"
  tb5$type<-"Missing persons"
  tb6$type<-"Missing persons"
  
  #Groupframe<-data.frame(Categories="Violent deaths", onedummy="",se="",ci_l="",ci_u="")
  MF<-rbind(tb1,tb2[1,],tb3)
  #Groupframe<-data.frame(Categories="Nonviolent deaths", onedummy="",se="",ci_l="",ci_u="")
  eframe<-data.frame(Categories="Excess nonviolent deaths", onedummy=round(tb2[2,2]-baseline),se=round(tb2[2,3]),ci_l=round(tb2[2,4]-baseline),ci_u=round(tb2[2,5]-baseline),type="Nonviolent deaths")
  MF<-rbind(MF,tb2[2,],eframe)
  #Groupframe<-data.frame(Categories="Missing persons", onedummy="",se="",ci_l="",ci_u="")
  MF<-rbind(MF,tb4, tb5,tb6)
  return(MF)
}
usedT<-cbind(makemaintableDF(trimmedrakedesign)[,1:3],data.frame(Raking=rep("1. Main model, full rake, trimmed weights",14)))
names(usedT)<-c("Categories","UsedE","UsedSE","Raking")
eqwdesign<-svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~simpleExpand)
eqd<-cbind(makemaintableDF(eqwdesign)[,1:3],data.frame(Raking=rep("7. Unadjusted equal probability",14)))
names(eqd)<-c("Categories","EQE","EQSE","Raking")
eqd

##                           Categories         EQE        EQSE
## 1                     Children (<18) 21137.00000 2850.000000
## 2                      Women (18-64) 21353.00000 2812.000000
## 3                      Elderly (65+)  1941.00000  637.000000
## 4                        Men (18-64) 40764.00000 3328.000000
## 11              Total violent deaths 85195.00000 6710.000000
## WEC     Women, elderly, and children    52.15190    2.886403
## 21           Total nonviolent deaths 14235.00000 1845.000000
## 12          Excess nonviolent deaths  6518.00000 1845.000000
## 13                    Children (<18)  3020.00000  828.000000
## 22                     Women (18-64)   216.00000  216.000000
## ...3                   Elderly (65+)     0.00000    0.000000
## 41                       Men (18-64) 10137.00000 1981.000000
## 14             Total missing persons 13372.00000 2431.000000
## WECmiss Women, elderly, and children    24.19355    5.166943
##                                  Raking
## 1       7. Unadjusted equal probability
## 2       7. Unadjusted equal probability
## 3       7. Unadjusted equal probability
## 4       7. Unadjusted equal probability
## 11      7. Unadjusted equal probability
## WEC     7. Unadjusted equal probability
## 21      7. Unadjusted equal probability
## 12      7. Unadjusted equal probability
## 13      7. Unadjusted equal probability
## 22      7. Unadjusted equal probability
## ...3    7. Unadjusted equal probability
## 41      7. Unadjusted equal probability
## 14      7. Unadjusted equal probability
## WECmiss 7. Unadjusted equal probability

# Make no Gaza 9 team design. The idea is to make it as if gaza9 never was part of the sample, i.e., not subsetting and retaining sample structure.
# It is debatable what is best.But the choice only affects standard errors.
nog9fates<-subset(fates,Interviewer != "gaza9")
nog9design<-rake(svydesign(data=nog9fates,id=~PSUID,strata=~stratum, weights=~onedummy),sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginalsMICS,governoratemarginals))
noGaza9<-cbind(makemaintableDF(nog9design)[,1:3],data.frame(Raking=rep("2. Team 9 excluded",14)))   
names(noGaza9)<-c("Categories","noGE","noGSE","Raking")
# raked, no trim
noTrim<-cbind(makemaintableDF(rakedesign)[,1:3],data.frame(Raking=rep("4. Full rake, no weight trim",14)))
names(noTrim)<-c("Categories","noTrimE","noTrimSE", "Raking")
# No household adjustment
NoHHdesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~HR00), population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, governoratemarginals))
NoHH<-cbind(makemaintableDF(NoHHdesign)[,1:3],data.frame(Raking=rep("6. No household size adjustment",14)))
names(NoHH)<-c("Categories","NOHHE","NOHH_SE","Raking")
NoHH

##                           Categories       NOHHE     NOHH_SE
## 1                     Children (<18) 25807.00000 3595.000000
## 2                      Women (18-64) 17664.00000 2297.000000
## 3                      Elderly (65+)  2802.00000  909.000000
## 4                        Men (18-64) 33650.00000 2742.000000
## 11              Total violent deaths 79924.00000 6640.000000
## WEC     Women, elderly, and children    57.89686    2.875111
## 21           Total nonviolent deaths 16233.00000 2021.000000
## 12          Excess nonviolent deaths  8516.00000 2021.000000
## 13                    Children (<18)  3959.00000 1123.000000
## 22                     Women (18-64)   194.00000  196.000000
## ...3                   Elderly (65+)     0.00000    0.000000
## 41                       Men (18-64)  8716.00000 1793.000000
## 14             Total missing persons 12869.00000 2521.000000
## WECmiss Women, elderly, and children    32.27426    6.065367
##                                  Raking
## 1       6. No household size adjustment
## 2       6. No household size adjustment
## 3       6. No household size adjustment
## 4       6. No household size adjustment
## 11      6. No household size adjustment
## WEC     6. No household size adjustment
## 21      6. No household size adjustment
## 12      6. No household size adjustment
## 13      6. No household size adjustment
## 22      6. No household size adjustment
## ...3    6. No household size adjustment
## 41      6. No household size adjustment
## 14      6. No household size adjustment
## WECmiss 6. No household size adjustment

#
# Household adjustment using 2017 HHsizes
HHPDesign<-rake(svydesign(data=fates,id=~PSUID,strata=~stratum, weights=~onedummy),sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), 
                population.margins=list(IDBTotalAgeMarginal, IDBGenderMarginal, householdsizemarginals2017,governoratemarginals))
HH2017<-cbind(makemaintableDF(HHPDesign)[,1:3],data.frame(Raking=rep("3. Census 2017 household size adjustment",14)))   
names(HH2017)<-c("Categories","HH2017_E","HH2017_SE","Raking")

# Use PCBS Population
PCBSPop23<-2226544+sum(birthsex)
PCBSAdjust=PCBSPop23/USCBPopAtRisk
# Adjust the marginals
PCBSTotalAgeMarginal<-IDBTotalAgeMarginal %>% mutate(Freq=Freq*PCBSAdjust)

PCBSGenderMarginal<-IDBGenderMarginal%>% mutate(Freq=Freq*PCBSAdjust)
PCBShouseholdsizemarginalsMICS<-householdsizemarginalsMICS%>% mutate(Freq=Freq*PCBSAdjust)
PCBSgovernoratemarginal<-governoratemarginals%>% mutate(Freq=Freq*PCBSAdjust)
# Full raked PCBS design
PCBSrakedesign<-rake(uwdesign,sample.margins=list( ~AgeGroups,~HR04,~hsize10,~HR00), population.margins=list(PCBSTotalAgeMarginal, PCBSGenderMarginal, PCBShouseholdsizemarginalsMICS,PCBSgovernoratemarginal))
PCBSPop<-cbind(makemaintableDF(PCBSrakedesign)[,1:3],data.frame(Raking=rep("5. PCBS population, full rake",14)))
names(PCBSPop)<-c("Categories","PCBSE","PCBS_SE","Raking")

# bind together
senstableDF<-cbind(usedT[1:3],eqd[,2:3],noGaza9[,2:3],noTrim[,2:3],NoHH[2:3],HH2017[2:3],PCBSPop[,2:3])   # Omit raking
senstableDF$type<-c(rep("Violent deaths",5),"Nonviolent deaths %",rep("Nonviolent deaths",2),rep("Missing persons",5),"Missing persons %")
senstableDF

##                           Categories       UsedE      UsedSE         EQE
## 1                     Children (<18) 22771.00000 3087.000000 21137.00000
## 2                      Women (18-64) 16576.00000 2222.000000 21353.00000
## 3                      Elderly (65+)  2868.00000  943.000000  1941.00000
## 4                        Men (18-64) 32949.00000 2703.000000 40764.00000
## 11              Total violent deaths 75164.00000 5919.000000 85195.00000
## WEC     Women, elderly, and children    56.16376    2.940178    52.15190
## 21           Total nonviolent deaths 16253.00000 2039.000000 14235.00000
## 12          Excess nonviolent deaths  8536.00000 2039.000000  6518.00000
## 13                    Children (<18)  3610.00000 1040.000000  3020.00000
## 22                     Women (18-64)   131.00000  134.000000   216.00000
## ...3                   Elderly (65+)     0.00000    0.000000     0.00000
## 41                       Men (18-64)  8468.00000 1649.000000 10137.00000
## 14             Total missing persons 12209.00000 2198.000000 13372.00000
## WECmiss Women, elderly, and children    30.63832    6.395562    24.19355
##                EQSE        noGE       noGSE     noTrimE    noTrimSE       NOHHE
## 1       2850.000000 19165.00000 2809.000000 22795.00000 3080.000000 25807.00000
## 2       2812.000000 13170.00000 2093.000000 16139.00000 2171.000000 17664.00000
## 3        637.000000  3061.00000 1106.000000  3349.00000 1167.000000  2802.00000
## 4       3328.000000 28736.00000 2637.000000 32085.00000 2641.000000 33650.00000
## 11      6710.000000 64132.00000 5410.000000 74367.00000 5855.000000 79924.00000
## WEC        2.886403    55.19215    3.385384    56.85650    2.940619    57.89686
## 21      1845.000000 17507.00000 2226.000000 17315.00000 2227.000000 16233.00000
## 12      1845.000000  9790.00000 2226.000000  9598.00000 2227.000000  8516.00000
## 13       828.000000  3297.00000 1008.000000  3538.00000 1020.000000  3959.00000
## 22       216.000000   136.00000  139.000000   126.00000  129.000000   194.00000
## ...3       0.000000     0.00000    0.000000     0.00000    0.000000     0.00000
## 41      1981.000000  8362.00000 1712.000000  8232.00000 1604.000000  8716.00000
## 14      2431.000000 11796.00000 2272.000000 11896.00000 2140.000000 12869.00000
## WECmiss    5.166943    29.10800    6.358807    30.79429    6.429389    32.27426
##             NOHH_SE    HH2017_E   HH2017_SE       PCBSE     PCBS_SE
## 1       3595.000000 22367.00000 3045.000000 24138.00000 3261.000000
## 2       2297.000000 15992.00000 2141.000000 17090.00000 2298.000000
## 3        909.000000  3440.00000 1200.000000  3546.00000 1236.000000
## 4       2742.000000 32040.00000 2641.000000 33975.00000 2797.000000
## 11      6640.000000 73838.00000 5776.000000 78749.00000 6200.000000
## WEC        2.875111    56.60792    2.940507    56.85650    2.940619
## 21      2021.000000 17225.00000 2244.000000 18335.00000 2358.000000
## 12      2021.000000  9508.00000 2244.000000 10618.00000 2358.000000
## 13      1123.000000  3510.00000 1019.000000  3746.00000 1080.000000
## 22       196.000000   125.00000  129.000000   133.00000  137.000000
## ...3       0.000000     0.00000    0.000000     0.00000    0.000000
## 41      1793.000000  8103.00000 1563.000000  8717.00000 1698.000000
## 14      2521.000000 11737.00000 2087.000000 12596.00000 2266.000000
## WECmiss    6.065367    30.96632    6.552877    30.79429    6.429389
##                        type
## 1            Violent deaths
## 2            Violent deaths
## 3            Violent deaths
## 4            Violent deaths
## 11           Violent deaths
## WEC     Nonviolent deaths %
## 21        Nonviolent deaths
## 12        Nonviolent deaths
## 13          Missing persons
## 22          Missing persons
## ...3        Missing persons
## 41          Missing persons
## 14          Missing persons
## WECmiss   Missing persons %

senstab<-senstableDF %>% select(Categories,UsedE, UsedSE,noGE,noGSE,HH2017_E,HH2017_SE,noTrimE,noTrimSE,PCBSE,PCBS_SE,NOHHE,NOHH_SE,EQE,EQSE,type) %>%
  group_by(type) %>%
  gt(row_group_as_column = TRUE) %>%
  opt_table_font(size=9) %>%
  fmt_number(n_sigfig = 3)%>%
  tab_header(
    title = "Sensitivy of estimates to model assumptions") %>%
  tab_source_note(source_note = paste("Based on ",maindata))  %>% 
  tab_spanner(
    label = md("<br>Main model"),
    columns = c(UsedE,UsedSE)
  ) %>% 
  tab_spanner(
    label = md("Unadjusted<br>equal probability"),
    columns = c(EQE,EQSE)) %>% 
          tab_spanner(
            label = md("<br>Team 9 excluded"),
            columns = c(noGE,noGSE)) %>%
  tab_spanner(
    label = md("Full rake,<br>no weight trim"),
    columns = c(noTrimE,noTrimSE)) %>%
  tab_spanner(
    label = md("Census 2017<br>household size adjustment"),
    columns = c(HH2017_E,HH2017_SE)) %>%
  tab_spanner(
    label = md("No houshold<br>size adjustment"),
    columns = c(NOHHE,NOHH_SE)) %>%
  tab_spanner(
    label = md("PCBS population<br>Full rake"),
    columns = c(PCBSE,PCBS_SE)) %>%
  cols_label(UsedE="Estimate",
             UsedSE~"SE",
             EQE="Estimate",
             EQSE~"SE",
             noGE="Estimate",
             noGSE~"SE",
             noTrimE="Estimate",
             noTrimSE~"SE",
             NOHHE="Estimate",
             NOHH_SE~"SE",
             PCBSE="Estimate",
             PCBS_SE="SE",
             HH2017_E="Estimate",HH2017_SE="SE",
             .fn = md) %>%
  tab_footnote(
    locations = cells_column_labels(c(UsedSE)),
    footnote = "SE: Standard error")

senstab

	Categories	Main model		Team 9 excluded		Census 2017 household size adjustment		Full rake, no weight trim		PCBS population Full rake		No houshold size adjustment		Unadjusted equal probability
Sensitivy of estimates to model assumptions
	Categories	Estimate	SE¹	Estimate	SE	Estimate	SE	Estimate	SE	Estimate	SE	Estimate	SE	Estimate	SE
Violent deaths	Children (<18)	22,800	3,090	19,200	2,810	22,400	3,040	22,800	3,080	24,100	3,260	25,800	3,600	21,100	2,850
	Women (18-64)	16,600	2,220	13,200	2,090	16,000	2,140	16,100	2,170	17,100	2,300	17,700	2,300	21,400	2,810
	Elderly (65+)	2,870	943	3,060	1,110	3,440	1,200	3,350	1,170	3,550	1,240	2,800	909	1,940	637
	Men (18-64)	32,900	2,700	28,700	2,640	32,000	2,640	32,100	2,640	34,000	2,800	33,600	2,740	40,800	3,330
	Total violent deaths	75,200	5,920	64,100	5,410	73,800	5,780	74,400	5,860	78,700	6,200	79,900	6,640	85,200	6,710
Nonviolent deaths %	Women, elderly, and children	56.2	2.94	55.2	3.39	56.6	2.94	56.9	2.94	56.9	2.94	57.9	2.88	52.2	2.89
Nonviolent deaths	Total nonviolent deaths	16,300	2,040	17,500	2,230	17,200	2,240	17,300	2,230	18,300	2,360	16,200	2,020	14,200	1,840
Nonviolent deaths	Excess nonviolent deaths	8,540	2,040	9,790	2,230	9,510	2,240	9,600	2,230	10,600	2,360	8,520	2,020	6,520	1,840
Missing persons	Children (<18)	3,610	1,040	3,300	1,010	3,510	1,020	3,540	1,020	3,750	1,080	3,960	1,120	3,020	828
	Women (18-64)	131	134	136	139	125	129	126	129	133	137	194	196	216	216
	Elderly (65+)	0	0	0	0	0	0	0	0	0	0	0	0	0	0
	Men (18-64)	8,470	1,650	8,360	1,710	8,100	1,560	8,230	1,600	8,720	1,700	8,720	1,790	10,100	1,980
	Total missing persons	12,200	2,200	11,800	2,270	11,700	2,090	11,900	2,140	12,600	2,270	12,900	2,520	13,400	2,430
Missing persons %	Women, elderly, and children	30.6	6.40	29.1	6.36	31.0	6.55	30.8	6.43	30.8	6.43	32.3	6.07	24.2	5.17
Based on GMS Household roster.sav
¹ SE: Standard error

saveTable(senstab)

Plot errorbars

# First, make a dataset that works. Instead of using melt, cast and reshape esoterica, simply rbind the constituent sets.
names(usedT)[2:3]<-names(eqd)[2:3]<-names(noGaza9)[2:3]<-names(noTrim)[2:3]<-names(PCBSPop)[2:3]<-names(NoHH)[2:3]<-names(HH2017)[2:3]<-c("Estimate", "SE")
usedT$Type<-eqd$Type<-noGaza9$Type<-noTrim$Type<-PCBSPop$Type<-NoHH$Type<-HH2017$Type<-c(rep("Violent deaths",6),rep("Nonviolent deaths",2),rep("Missing persons",6))
longsensetab<-rbind(usedT,noGaza9,HH2017,noTrim,PCBSPop,NoHH,eqd)
dodge<-position_dodge(width=-0.9)   # creates warning in ggplot, but orders legend
gplotdata<-longsensetab %>%filter(!grepl("issing",Categories)) %>%
                         filter(!grepl(", and",Categories))
sensitivplot<-ggplot(gplotdata,aes(x=paste(Type,Categories,sep=" "),y=Estimate,group=Raking,color=Raking ))+
  geom_pointrange(aes(ymin=Estimate-1.96*SE,ymax=Estimate+1.96*SE),position=dodge)+
  xlab("")+ylab("Persons (point estimate and lower and upper confidence bounds)")+
  coord_flip()+
  theme_light()+ theme(legend.position="bottom")+guides(color=guide_legend(ncol=3))
  
sensitivplot

## Warning: `position_dodge()` requires non-overlapping x intervals.

pdf("Sensitivityplot.pdf", width=7.5,height=7,paper="letter")
print(sensitivplot)

## Warning: `position_dodge()` requires non-overlapping x intervals.

dev.off()

## png 
##   2

End of code

The Gaza Strip Mortality Survey

Jon Pedersen/Mike Spagat/Gaza Strip mortality survey project group

2025-04-22