Gaza-Mortality-Survey / Combined various calculations.R
Combined various calculations.R
Raw
library(tidyverse)
library(haven)
library(gt)

fates <- 
  read_sav("households.sav")
respondents <-  
  read_sav("respondents.sav")
births <- 
  read_sav("births.sav")

#Fates - 8,740 still present in the household, 281 left Gaza, 137 moved
#within Gaza, 457 dead, 62 missing, 52 imprisoned

nrow(filter(fates, HR05 == 1))
nrow(filter(fates, HR05 == 2))
nrow(filter(fates, HR05 == 3))
nrow(filter(fates, HR05 == 4))
nrow(filter(fates, HR05 == 5))
nrow(filter(fates, HR05 == 6))

#Types of deaths - 14 nonviolent with medical aid, 45 nonviolent 
#without medical aid, 5 accidental, 393 violent

nrow(filter(fates, HR06 == 1))
nrow(filter(fates, HR06 == 2))
nrow(filter(fates, HR06 == 3))
nrow(filter(fates, HR06 == 4))
nrow(filter(fates, HR06 == 5))

#Sample demographics (male == 1) - Note that small children with ages
#less than 1 were coded as having ages of 1

nrow(filter(fates, HR04 == 1, HR03 < 6))
nrow(filter(fates, HR04 == 2, HR03 < 6))
nrow(filter(fates, HR04 == 1, HR03 > 5, HR03 < 15))
nrow(filter(fates, HR04 == 2, HR03 > 5, HR03 < 15))
nrow(filter(fates, HR04 == 1, HR03 > 14, HR03 < 18))
nrow(filter(fates, HR04 == 2, HR03 > 14, HR03 < 18))
nrow(filter(fates, HR04 == 1, HR03 > 17, HR03 < 40))
nrow(filter(fates, HR04 == 2, HR03 > 17, HR03 < 40))
nrow(filter(fates, HR04 == 1, HR03 > 39, HR03 < 65))
nrow(filter(fates, HR04 == 2, HR03 > 39, HR03 < 65))
nrow(filter(fates, HR04 == 1, HR03 > 64))
nrow(filter(fates, HR04 == 2, HR03 > 64))

#Make a table of fates broken down by demographics

fates_dem <- c()
for (i in c(1:6)) {
  x1 <- nrow(filter(fates, HR04 == 1, HR03 < 5, HR05 == i))
  x2 <- nrow(filter(fates, HR04 == 2, HR03 < 5, HR05 == i))
  x3 <-nrow(filter(fates, HR04 == 1, HR03 > 4, HR03 < 12, HR05 == i))
  x4 <- nrow(filter(fates, HR04 == 2, HR03 > 4, HR03 < 12, HR05 == i))
  x5 <- nrow(filter(fates, HR04 == 1, HR03 > 11, HR03 < 18, HR05 == i))
  x6 <- nrow(filter(fates, HR04 == 2, HR03 > 11, HR03 < 18, HR05 == i))
  x7 <- nrow(filter(fates, HR04 == 1, HR03 > 17, HR03 < 30, HR05 == i))
  x8 <- nrow(filter(fates, HR04 == 2, HR03 > 17, HR03 < 30, HR05 == i))
  x9 <- nrow(filter(fates, HR04 == 1, HR03 > 29, HR03 < 40, HR05 == i))
  x10 <- nrow(filter(fates, HR04 == 2, HR03 > 29, HR03 < 40, HR05 == i))
  x11 <- nrow(filter(fates, HR04 == 1, HR03 > 39, HR03 < 65, HR05 == i))
  x12 <- nrow(filter(fates, HR04 == 2, HR03 > 39, HR03 < 65, HR05 == i))
  x13 <-nrow(filter(fates, HR04 == 1, HR03 > 64, HR05 == i))
  x14 <- nrow(filter(fates, HR04 == 2, HR03 > 64, HR05 == i))
  fates_dem <- c(fates_dem, x1, x2, x3, x4, x5, x6, x7, x8,
                 x9, x10, x11, x12, x13, x14)
}

matrix_fates <- matrix(fates_dem, nrow = 14, ncol = 6)

df_fates <- as.data.frame(matrix_fates)
demo_categories <- c("Male 0-4", "Female 0-4", "Male 5-11", 
                     "Female 5-11", "Male 12-17", "Female 12-17",
                     "Male 18-29", "Female 18-29", "Male 30-39", 
                     "Female 30-39", "Male 40-64",
                     "Female 40-64", "Male 65+", "Female 65+")
df_fates <- mutate(df_fates, demo_categories)
colnames(df_fates) <- c("Present", "Left Gaza", "Moved Out of Household",
                        "Dead", "Missing", "In Prison", "Gender/Age")

df_fates <- relocate(df_fates, "Gender/Age")

df_fates <- df_fates %>%
  bind_rows(summarise(., across(where(is.numeric), sum),
                      across(where(is.character), ~'Total')))
Total <- c()
for (i in c(1:15)) {
  Total <- c(Total, sum(df_fates[i,2:7]))
}



df_fates <- mutate(df_fates, Total)

fates_table <- gt(df_fates)%>%
  #  tab_header(
  #    title = "Table 1: Outcomes for all People in the Sample",
  #    subtitle = "Classified by Gender and Age"
  #  ) %>%
  tab_source_note(source_note = "Unweighted GMS Data")

gtsave(fates_table, "table1.png")

#Now let's do a similar table for deaths

deaths_dem <- c()
for (i in c(1:4)) {
  x1 <- nrow(filter(fates, HR04 == 1, HR03 < 5, HR06 == i))
  x2 <- nrow(filter(fates, HR04 == 2, HR03 < 5, HR06 == i))
  x3 <-nrow(filter(fates, HR04 == 1, HR03 > 4, HR03 < 12, HR06 == i))
  x4 <- nrow(filter(fates, HR04 == 2, HR03 > 4, HR03 < 12, HR06 == i))
  x5 <- nrow(filter(fates, HR04 == 1, HR03 > 11, HR03 < 18, HR06 == i))
  x6 <- nrow(filter(fates, HR04 == 2, HR03 > 11, HR03 < 18, HR06 == i))
  x7 <- nrow(filter(fates, HR04 == 1, HR03 > 17, HR03 < 30, HR06 == i))
  x8 <- nrow(filter(fates, HR04 == 2, HR03 > 17, HR03 < 30, HR06 == i))
  x9 <- nrow(filter(fates, HR04 == 1, HR03 > 29, HR03 < 40, HR06 == i))
  x10 <- nrow(filter(fates, HR04 == 2, HR03 > 29, HR03 < 40, HR06 == i))
  x11 <- nrow(filter(fates, HR04 == 1, HR03 > 39, HR03 < 65, HR06 == i))
  x12 <- nrow(filter(fates, HR04 == 2, HR03 > 39, HR03 < 65, HR06 == i))
  x13 <-nrow(filter(fates, HR04 == 1, HR03 > 64, HR06 == i))
  x14 <- nrow(filter(fates, HR04 == 2, HR03 > 64, HR06 == i))
  deaths_dem <- c(deaths_dem, x1, x2, x3, x4, x5, x6, x7, x8,
                  x9, x10, x11, x12, x13, x14)
}

matrix_deaths <- matrix(deaths_dem, nrow = 14, ncol = 4)

df_deaths <- as.data.frame(matrix_deaths)
df_deaths <- mutate(df_deaths, df_deaths$V1 + df_deaths$V2)
df_deaths <- df_deaths[,-(1:2)]  
df_deaths <- relocate(df_deaths, "df_deaths$V1 + df_deaths$V2")
demo_categories <- c("Male 0-4", "Female 0-4", "Male 5-11", 
                     "Female 5-11", "Male 12-17", "Female 12-17",
                     "Male 18-29", "Female 18-29", "Male 30-39", 
                     "Female 30-39", "Male 40-64",
                     "Female 40-64", "Male 65+", "Female 65+")
df_deaths <- mutate(df_deaths, demo_categories)
colnames(df_deaths) <- c("Nonviolent", 
                         "Accident", "Violent", "Gender/Age")

df_deaths <- relocate(df_deaths, "Gender/Age")

df_deaths <- df_deaths %>%
  bind_rows(summarise(., across(where(is.numeric), sum),
                      across(where(is.character), ~'Total')))
Total <- c()
for (i in c(1:15)) {
  Total <- c(Total, sum(df_deaths[i,2:4]))
}



df_deaths <- mutate(df_deaths, Total)

deaths_table <- gt(df_deaths)%>%
  #  tab_header(
  #    title = "Table 2: All Deaths in the Sample",
  #    subtitle = "Classified by Gender and Age"
  #  )%>%
  tab_source_note(source_note = "Unweighted GMS Data")

gtsave(deaths_table, "table2.png")

#Here is a breakdown of violent deaths classified by governorate of origin

table(fates$d04)
table(respondents$d04)

north <- round(nrow(filter(fates, HR06 == 4, d04 == 1)), digits = 0)
city <- nrow(filter(fates, HR06 == 4, d04 == 2))
khan <- nrow(filter(fates, HR06 == 4, d04 == 3))
central <- nrow(filter(fates, HR06 == 4, d04 == 4))
rafah <- nrow(filter(fates, HR06 == 4, d04 == 5))

north+city+central+khan+rafah

north_pop <- round(nrow(filter(fates, d04 == 1)), digits = 0)
city_pop <- nrow(filter(fates, d04 == 2))
khan_pop <- nrow(filter(fates, d04 == 3))
central_pop <- nrow(filter(fates, d04 == 4))
rafah_pop <- nrow(filter(fates, d04 == 5))

north_rate <- round(100*north/north_pop, digits = 1)
city_rate <- round(100*city/city_pop, digits = 1)
central_rate <- round(100*central/central_pop, digits = 1)
khan_rate <- round(100*khan/khan_pop, digits = 1)
rafah_rate <- round(100*rafah/rafah_pop, digits = 1)

goverornate_data <- c(north, city, central, khan, rafah,
                      north_rate, city_rate, central_rate,
                      khan_rate, rafah_rate)

matrix_governorate <- matrix(goverornate_data, nrow = 5, ncol = 2)

df_governorate <- as.data.frame(matrix_governorate)

Governorates <- c("Northern Gaza", "Gaza City",
                  "Deir al-Balah", "Khan Younis",
                  "Rafah")


df_governorate <- mutate(df_governorate, Governorates)

colnames(df_governorate) <-  c("Violent Deaths", 
                               "Violent Deaths as Percent of Sample",
                               "Governorates")

df_governorate <- relocate(df_governorate, "Governorates")


governorates_table <- gt(df_governorate)%>%
  #  tab_header(
  #    title = "Table 3: All Violent Deaths in the Sample",
  #    subtitle = "Classified by Governorate of Origin"
  #  )%>%
  tab_source_note(source_note = "Unweighted GMS Data")

gtsave(governorates_table, "table3.png")


#Now pull some basic information on births

nrow(births)

#179 boys and 178 girls

nrow(filter(births, BR05 == 1))
nrow(filter(births, BR05 == 2))

boys <- filter(births, BR05 == 1)
girls <- filter(births, BR05 == 2)

#In 2024 144 boys and 151 girls

nrow(filter(births, BR05 == 1, BR04 > "2023-12-31", BR04 < "2025-01-01"))
nrow(filter(births, BR05 == 2, BR04 > "2023-12-31", BR04 < "2025-01-01"))

#Starting 9 months after the war 63 boys and 64 girls

nrow(filter(births, BR05 == 1, BR04 > "2024-08-06"))
nrow(filter(births, BR05 == 2, BR04 > "2024-08-06"))

#There's a decrease in the birth rate starting 9 months after the start 
#of the war - from 25.6 to 21.2 per month (in the sample)

nrow(filter(births, BR04 > "2023-10-06", BR04 < "2024-08-07"))/9
nrow(filter(births, BR04 > "2024-08-06"))/6

#4 infant deaths

dead <- filter(births, BR04 > "2023-10-06", BR07 == 4)
nrow(filter(births, BR04 > "2023-10-06", BR07 != 4))


#2 killed violently and 1 in an accident

#Calculate person-days for infants
newborns <- filter(births, BR04 > "2023-10-06")

days <- c()
for (i in c(1:nrow(newborns))) {
  days <- c(days,length(seq(from = as.Date("2023-10-07"), 
                            to = as.Date(newborns$BR04[i]),
                            by = 'day')))
}

#180.1 equivalent newborns exposed for the whole war

equivalent_persons <- round(sum(days)/(length(seq(from = as.Date("2023-10-07"),
                                                  to = as.Date("2025-01-06"), by = 'day'))), digits = 1)

infant_data <- c(nrow(boys), nrow(girls), equivalent_persons, nrow(dead))

matrix_infant <- matrix(infant_data, nrow = 1)

df_infant <- as.data.frame(matrix_infant)

colnames(df_infant) <- c("Boys", "Girls", 
                         "Equivalent Persons","Dead")


infant_table <- gt(df_infant)%>%
  tab_header(
    title = "Born After October 6, 2023"
  ) 

gtsave(infant_table, "infant_table.png")

#An unweighted estimate for the annual birth rate during the war is 62,832

(nrow(boys)+nrow(girls))*220/1.25

library(tidyverse)
march <- read.csv("MOH_List_3_23_25.csv")
october <-read.csv("MoH_List_10_7_24.csv")

children <- nrow(filter(march, Age < 18))
women <- nrow(filter(march, Age > 17, Age < 65, Sex == "Female"))
elderly <- nrow(filter(march, Age > 64))

(children + women + elderly)/nrow(march)

children <- nrow(filter(october, Age < 18))
women <- nrow(filter(october, Age > 17, Age < 65, Sex == "Female"))
elderly <- nrow(filter(october, Age > 64))

(children + women + elderly)/nrow(october)

#calculate the percentage of our lower and upper violent-death estimates
#captured by the MoH

lower <- 45650/63600
upper <- 45650/86800
central <- 45650/75200

1 - central
1- lower
1- upper

#Calculate percentage of population killed
#According to the US census there were 1,064,348 males and 1,034,041 females
#in Gaza at the beginning of the war

population <- 1064348 + 1034041

violent_lower <- 63600
violent_upper <- 86800
violent_central <- 75200

100*violent_lower/population
100*violent_upper/population
100*violent_central/population



#The rest of this code is aimed evaluating the quality of our raking. In 
#particular, we create a table that compares population distributions with
#weighted (after raking) sample distributions.

library(tidyverse)
library(gt)
jon <- readRDS("GMS_at_risk_apr_18_2025.rds")

jon |> 
  zap_label() |> 
  zap_labels()

##              Males Females AgeGr
## 1         1  182000  174000   0-4
## 2         2  200191  188991  5-11
## 3         3  165430  157849 12-17
## 4         4  248133  240116 18-29
## 5         5  130574  133015 30-39
## 6         6  146967  149998 40-64
## 7         7   29944   28670   65+
## 8        NA 1064348 1034041 Total

pop_dem <- c(182000, 174000, 200191, 188991,
             165430, 157849, 248133, 240116,
             130574, 133015, 146967, 149998,
             29944, 28670)

pop_total <- 1064348 + 1034041

pop_dem_percentages <- 100*pop_dem/pop_total
sum(pop_dem_percentages)


sample_dem <- c(nrow(filter(jon, HR04 == 1, HR03 < 5)),
                nrow(filter(jon, HR04 == 2, HR03 < 5)),
                nrow(filter(jon, HR04 == 1, HR03 > 4, HR03 < 12)),
                nrow(filter(jon, HR04 == 2, HR03 > 4, HR03 < 12)),
                nrow(filter(jon, HR04 == 1, HR03 > 11, HR03 < 18)),
                nrow(filter(jon, HR04 == 2, HR03 > 11, HR03 < 18)),
                nrow(filter(jon, HR04 == 1, HR03 > 17, HR03 < 30)),
                nrow(filter(jon, HR04 == 2, HR03 > 17, HR03 < 30)),
                nrow(filter(jon, HR04 == 1, HR03 > 29, HR03 < 40)),
                nrow(filter(jon, HR04 == 2, HR03 > 29, HR03 < 40)),
                nrow(filter(jon, HR04 == 1, HR03 > 39, HR03 < 65)),
                nrow(filter(jon, HR04 == 2, HR03 > 39, HR03 < 65)),
                nrow(filter(jon, HR04 == 1, HR03 > 64)),
                nrow(filter(jon, HR04 == 2, HR03 > 64))
)

sample_total <- nrow(jon)

sample_dem_percentages <- 100*sample_dem/sample_total

sum(jon$weights)

sample_dem_weighted <- c(sum(filter(jon, HR04 == 1, HR03 < 5)$weights),
                         sum(filter(jon, HR04 == 2, HR03 < 5)$weights),
                         sum(filter(jon, HR04 == 1, HR03 > 4, HR03 < 12)$weights),
                         sum(filter(jon, HR04 == 2, HR03 > 4, HR03 < 12)$weights),
                         sum(filter(jon, HR04 == 1, HR03 > 11, HR03 < 18)$weights),
                         sum(filter(jon, HR04 == 2, HR03 > 11, HR03 < 18)$weights),
                         sum(filter(jon, HR04 == 1, HR03 > 17, HR03 < 30)$weights),
                         sum(filter(jon, HR04 == 2, HR03 > 17, HR03 < 30)$weights),
                         sum(filter(jon, HR04 == 1, HR03 > 29, HR03 < 40)$weights),
                         sum(filter(jon, HR04 == 2, HR03 > 29, HR03 < 40)$weights),
                         sum(filter(jon, HR04 == 1, HR03 > 39, HR03 < 65)$weights),
                         sum(filter(jon, HR04 == 2, HR03 > 39, HR03 < 65)$weights),
                         sum(filter(jon, HR04 == 1, HR03 > 64)$weights),
                         sum(filter(jon, HR04 == 2, HR03 > 64)$weights)
)

sample_weighted_total <- sum(sample_dem_weighted)

sample_dem_weighted_percentages <- 100*sample_dem_weighted/
  sample_weighted_total

sample_dem_weighted_percentages-pop_dem_percentages


#Here are governorate populations for, in order, North Gaza, Gaza City
#Deir al Balah, Khan Younis and Rafah

pop_gov <- c(418833, 705983, 300835, 413315, 259423)

sum(pop_gov)

pop_gov_percentages <- 100*pop_gov/sum(pop_gov)

sample_gov <- c(nrow(filter (jon, HR00 == 1)),
                nrow(filter (jon, HR00 == 2)),
                nrow(filter (jon, HR00 == 3)),
                nrow(filter (jon, HR00 == 4)),
                nrow(filter (jon, HR00 == 5)))

sample_gov_percentages <- 100*sample_gov/nrow(jon)

sample_gov_weighted <- c(sum(filter (jon, HR00 == 1)$weights),
                         sum(filter (jon, HR00 == 2)$weights),
                         sum(filter (jon, HR00 == 3)$weights),
                         sum(filter (jon, HR00 == 4)$weights),
                         sum(filter (jon, HR00 == 5)$weights))

sample_gov_weighted_percentages <- 100*sample_gov_weighted/
  sum(sample_gov_weighted)

sample_gov_weighted_percentages- pop_gov_percentages

#Jon used mics household sizes in the raking so the census information 
#is hear just for a possible future comparison


census_hh <- c(11920.57, 70385.14, 114332.80, 194472.65, 275651.26,
               337047.61, 334535.13, 274554.36, 191615.78, 293873.71)

census_household_percentages <- 100*census_hh/sum(census_hh)

mics_hh <- c(110, 501, 1069, 1841, 2458,
             3218, 3289, 2255, 1816, 2502)

mics_household_percentages <- 100*mics_hh/sum(mics_hh)

sum(mics_household_percentages)


households_df <- jon %>%
  group_by(prikey)%>%
  summarize(weights = sum(weights),
            sizes = mean(hsize10))


sample_hh_1 <- nrow(filter(households_df, sizes == 1))
sample_hh_2 <- nrow(filter(households_df, sizes == 2))
sample_hh_3 <- nrow(filter(households_df, sizes == 3))
sample_hh_4 <- nrow(filter(households_df, sizes == 4))
sample_hh_5 <- nrow(filter(households_df, sizes == 5))
sample_hh_6 <- nrow(filter(households_df, sizes == 6))
sample_hh_7 <- nrow(filter(households_df, sizes == 7))
sample_hh_8 <- nrow(filter(households_df, sizes == 8))
sample_hh_9 <- nrow(filter(households_df, sizes == 9))
sample_hh_10 <- nrow(filter(households_df, sizes == 10))

sample_hh_1_percentage <- 100*sample_hh_1/nrow(households_df)
sample_hh_2_percentage <- 100*sample_hh_2/nrow(households_df)
sample_hh_3_percentage <- 100*sample_hh_3/nrow(households_df)
sample_hh_4_percentage <- 100*sample_hh_4/nrow(households_df)
sample_hh_5_percentage <- 100*sample_hh_5/nrow(households_df)
sample_hh_6_percentage <- 100*sample_hh_6/nrow(households_df)
sample_hh_7_percentage <- 100*sample_hh_7/nrow(households_df)
sample_hh_8_percentage <- 100*sample_hh_8/nrow(households_df)
sample_hh_9_percentage <- 100*sample_hh_9/nrow(households_df)
sample_hh_10_percentage <- 100*sample_hh_10/nrow(households_df)

sample_household_percentages <- c(sample_hh_1_percentage,
                                  sample_hh_2_percentage,
                                  sample_hh_3_percentage,
                                  sample_hh_4_percentage,
                                  sample_hh_5_percentage,
                                  sample_hh_6_percentage,
                                  sample_hh_7_percentage,
                                  sample_hh_8_percentage,
                                  sample_hh_9_percentage,
                                  sample_hh_10_percentage)

sum(sample_household_percentages)


sample_hh_1_weighted <- sum(filter(households_df, sizes == 1)$weights)
sample_hh_2_weighted <- sum(filter(households_df, sizes == 2)$weights)
sample_hh_3_weighted <- sum(filter(households_df, sizes == 3)$weights)
sample_hh_4_weighted <- sum(filter(households_df, sizes == 4)$weights)
sample_hh_5_weighted <- sum(filter(households_df, sizes == 5)$weights)
sample_hh_6_weighted <- sum(filter(households_df, sizes == 6)$weights)
sample_hh_7_weighted <- sum(filter(households_df, sizes == 7)$weights)
sample_hh_8_weighted <- sum(filter(households_df, sizes == 8)$weights)
sample_hh_9_weighted <- sum(filter(households_df, sizes == 9)$weights)
sample_hh_10_weighted <- sum(filter(households_df, sizes == 10)$weights)

sample_hh_1_weighted_percentage <- 100*sample_hh_1_weighted/sum(households_df$weights)
sample_hh_2_weighted_percentage <- 100*sample_hh_2_weighted/sum(households_df$weights)
sample_hh_3_weighted_percentage <- 100*sample_hh_3_weighted/sum(households_df$weights)
sample_hh_4_weighted_percentage <- 100*sample_hh_4_weighted/sum(households_df$weights)
sample_hh_5_weighted_percentage <- 100*sample_hh_5_weighted/sum(households_df$weights)
sample_hh_6_weighted_percentage <- 100*sample_hh_6_weighted/sum(households_df$weights)
sample_hh_7_weighted_percentage <- 100*sample_hh_7_weighted/sum(households_df$weights)
sample_hh_8_weighted_percentage <- 100*sample_hh_8_weighted/sum(households_df$weights)
sample_hh_9_weighted_percentage <- 100*sample_hh_9_weighted/sum(households_df$weights)
sample_hh_10_weighted_percentage <- 100*sample_hh_10_weighted/sum(households_df$weights)

sample_household_weighted_percentages <- c(sample_hh_1_weighted_percentage,
                                           sample_hh_2_weighted_percentage,
                                           sample_hh_3_weighted_percentage,
                                           sample_hh_4_weighted_percentage,
                                           sample_hh_5_weighted_percentage,
                                           sample_hh_6_weighted_percentage,
                                           sample_hh_7_weighted_percentage,
                                           sample_hh_8_weighted_percentage,
                                           sample_hh_9_weighted_percentage,
                                           sample_hh_10_weighted_percentage)

sum(sample_household_weighted_percentages)

census_household_percentages-sample_household_weighted_percentages

sum(mics_household_percentages-sample_household_weighted_percentages)

raking_data <- c(sample_dem_percentages, sample_gov_percentages, 
                 sample_household_percentages,
                 pop_dem_percentages, pop_gov_percentages, 
                 mics_household_percentages,
                 sample_dem_weighted_percentages, sample_gov_weighted_percentages,
                 sample_household_weighted_percentages)

raking_matrix <- matrix(raking_data, ncol = 3)

df_raking <- as.data.frame(raking_matrix)

df_raking <- mutate(df_raking, 
                    df_raking[,2] - df_raking[,3])

categories <- c("Male 0-4", "Female 0-4", "Male 5-14", 
                "Female 5-14", "Male 15-17", "Female 15-17",
                "Male 18-29", "Female 18-29", "Male 30-39", 
                "Female 30-39", "Male 40-64",
                "Female 40-64", "Male 65+", "Female 65+",
                "North Gaza", "Gaza City","Deir al Balah", 
                "Khan Younis", "Rafah",
                "1", "2", "3", "4", "5", "6", "7", "8", "9", "10+"
)

df_raking <- mutate(df_raking, categories)

colnames(df_raking) <- c("Sample Percentages", "Population Percentages", 
                         "Raked Sample Percentages", "Population - Raked Sample",
                         "Categories")

df_raking <- relocate(df_raking, "Categories")



raking_table <- gt(df_raking)%>%
  tab_header(
    title = "Table S7: Comparison of Population Values with Sample and Raked Sample Values",
    subtitle = "By Demographics, Governorate and Household Size"
  ) %>%
  fmt_number(n_sigfig = 3)%>%
  tab_row_group(label = "Household Sizes", 
                rows = c(20:29)) %>%
  tab_row_group(label = "Governorates", 
                rows = c(15:19))%>%
  tab_row_group(label = "Ages/Sexes", 
                rows = c(1:14))



gtsave(raking_table, "raking_table.png")

summary(df_raking$`Population - Raked Sample`)

#Raked percentages are very close to population percentages with the exception
#of for 10+ households which differ by about 2 percentage points so it is
#worth checking whether the 10+ households have substantially different
#death rates in the unweighted sample than the other households. The violent
#death rate for the 10+ households is 3.1% compared to 4.0 for the other 
#households so upweighting the 10+ households would decrease the violent
#death estimate but not by much since their are few 10+ households and
#a slightly upweight would not matter much.

100*nrow(filter(jon, hsize > 9 & Violent == 1))/
  nrow(filter(jon, hsize > 9))

100*nrow(filter(jon, hsize < 10 & Violent == 1))/
  nrow(filter(jon, hsize < 10))