library(tidyverse)
library(haven)
library(gt)
fates <-
read_sav("households.sav")
respondents <-
read_sav("respondents.sav")
births <-
read_sav("births.sav")
#Fates - 8,740 still present in the household, 281 left Gaza, 137 moved
#within Gaza, 457 dead, 62 missing, 52 imprisoned
nrow(filter(fates, HR05 == 1))
nrow(filter(fates, HR05 == 2))
nrow(filter(fates, HR05 == 3))
nrow(filter(fates, HR05 == 4))
nrow(filter(fates, HR05 == 5))
nrow(filter(fates, HR05 == 6))
#Types of deaths - 14 nonviolent with medical aid, 45 nonviolent
#without medical aid, 5 accidental, 393 violent
nrow(filter(fates, HR06 == 1))
nrow(filter(fates, HR06 == 2))
nrow(filter(fates, HR06 == 3))
nrow(filter(fates, HR06 == 4))
nrow(filter(fates, HR06 == 5))
#Sample demographics (male == 1) - Note that small children with ages
#less than 1 were coded as having ages of 1
nrow(filter(fates, HR04 == 1, HR03 < 6))
nrow(filter(fates, HR04 == 2, HR03 < 6))
nrow(filter(fates, HR04 == 1, HR03 > 5, HR03 < 15))
nrow(filter(fates, HR04 == 2, HR03 > 5, HR03 < 15))
nrow(filter(fates, HR04 == 1, HR03 > 14, HR03 < 18))
nrow(filter(fates, HR04 == 2, HR03 > 14, HR03 < 18))
nrow(filter(fates, HR04 == 1, HR03 > 17, HR03 < 40))
nrow(filter(fates, HR04 == 2, HR03 > 17, HR03 < 40))
nrow(filter(fates, HR04 == 1, HR03 > 39, HR03 < 65))
nrow(filter(fates, HR04 == 2, HR03 > 39, HR03 < 65))
nrow(filter(fates, HR04 == 1, HR03 > 64))
nrow(filter(fates, HR04 == 2, HR03 > 64))
#Make a table of fates broken down by demographics
fates_dem <- c()
for (i in c(1:6)) {
x1 <- nrow(filter(fates, HR04 == 1, HR03 < 5, HR05 == i))
x2 <- nrow(filter(fates, HR04 == 2, HR03 < 5, HR05 == i))
x3 <-nrow(filter(fates, HR04 == 1, HR03 > 4, HR03 < 12, HR05 == i))
x4 <- nrow(filter(fates, HR04 == 2, HR03 > 4, HR03 < 12, HR05 == i))
x5 <- nrow(filter(fates, HR04 == 1, HR03 > 11, HR03 < 18, HR05 == i))
x6 <- nrow(filter(fates, HR04 == 2, HR03 > 11, HR03 < 18, HR05 == i))
x7 <- nrow(filter(fates, HR04 == 1, HR03 > 17, HR03 < 30, HR05 == i))
x8 <- nrow(filter(fates, HR04 == 2, HR03 > 17, HR03 < 30, HR05 == i))
x9 <- nrow(filter(fates, HR04 == 1, HR03 > 29, HR03 < 40, HR05 == i))
x10 <- nrow(filter(fates, HR04 == 2, HR03 > 29, HR03 < 40, HR05 == i))
x11 <- nrow(filter(fates, HR04 == 1, HR03 > 39, HR03 < 65, HR05 == i))
x12 <- nrow(filter(fates, HR04 == 2, HR03 > 39, HR03 < 65, HR05 == i))
x13 <-nrow(filter(fates, HR04 == 1, HR03 > 64, HR05 == i))
x14 <- nrow(filter(fates, HR04 == 2, HR03 > 64, HR05 == i))
fates_dem <- c(fates_dem, x1, x2, x3, x4, x5, x6, x7, x8,
x9, x10, x11, x12, x13, x14)
}
matrix_fates <- matrix(fates_dem, nrow = 14, ncol = 6)
df_fates <- as.data.frame(matrix_fates)
demo_categories <- c("Male 0-4", "Female 0-4", "Male 5-11",
"Female 5-11", "Male 12-17", "Female 12-17",
"Male 18-29", "Female 18-29", "Male 30-39",
"Female 30-39", "Male 40-64",
"Female 40-64", "Male 65+", "Female 65+")
df_fates <- mutate(df_fates, demo_categories)
colnames(df_fates) <- c("Present", "Left Gaza", "Moved Out of Household",
"Dead", "Missing", "In Prison", "Gender/Age")
df_fates <- relocate(df_fates, "Gender/Age")
df_fates <- df_fates %>%
bind_rows(summarise(., across(where(is.numeric), sum),
across(where(is.character), ~'Total')))
Total <- c()
for (i in c(1:15)) {
Total <- c(Total, sum(df_fates[i,2:7]))
}
df_fates <- mutate(df_fates, Total)
fates_table <- gt(df_fates)%>%
# tab_header(
# title = "Table 1: Outcomes for all People in the Sample",
# subtitle = "Classified by Gender and Age"
# ) %>%
tab_source_note(source_note = "Unweighted GMS Data")
gtsave(fates_table, "table1.png")
#Now let's do a similar table for deaths
deaths_dem <- c()
for (i in c(1:4)) {
x1 <- nrow(filter(fates, HR04 == 1, HR03 < 5, HR06 == i))
x2 <- nrow(filter(fates, HR04 == 2, HR03 < 5, HR06 == i))
x3 <-nrow(filter(fates, HR04 == 1, HR03 > 4, HR03 < 12, HR06 == i))
x4 <- nrow(filter(fates, HR04 == 2, HR03 > 4, HR03 < 12, HR06 == i))
x5 <- nrow(filter(fates, HR04 == 1, HR03 > 11, HR03 < 18, HR06 == i))
x6 <- nrow(filter(fates, HR04 == 2, HR03 > 11, HR03 < 18, HR06 == i))
x7 <- nrow(filter(fates, HR04 == 1, HR03 > 17, HR03 < 30, HR06 == i))
x8 <- nrow(filter(fates, HR04 == 2, HR03 > 17, HR03 < 30, HR06 == i))
x9 <- nrow(filter(fates, HR04 == 1, HR03 > 29, HR03 < 40, HR06 == i))
x10 <- nrow(filter(fates, HR04 == 2, HR03 > 29, HR03 < 40, HR06 == i))
x11 <- nrow(filter(fates, HR04 == 1, HR03 > 39, HR03 < 65, HR06 == i))
x12 <- nrow(filter(fates, HR04 == 2, HR03 > 39, HR03 < 65, HR06 == i))
x13 <-nrow(filter(fates, HR04 == 1, HR03 > 64, HR06 == i))
x14 <- nrow(filter(fates, HR04 == 2, HR03 > 64, HR06 == i))
deaths_dem <- c(deaths_dem, x1, x2, x3, x4, x5, x6, x7, x8,
x9, x10, x11, x12, x13, x14)
}
matrix_deaths <- matrix(deaths_dem, nrow = 14, ncol = 4)
df_deaths <- as.data.frame(matrix_deaths)
df_deaths <- mutate(df_deaths, df_deaths$V1 + df_deaths$V2)
df_deaths <- df_deaths[,-(1:2)]
df_deaths <- relocate(df_deaths, "df_deaths$V1 + df_deaths$V2")
demo_categories <- c("Male 0-4", "Female 0-4", "Male 5-11",
"Female 5-11", "Male 12-17", "Female 12-17",
"Male 18-29", "Female 18-29", "Male 30-39",
"Female 30-39", "Male 40-64",
"Female 40-64", "Male 65+", "Female 65+")
df_deaths <- mutate(df_deaths, demo_categories)
colnames(df_deaths) <- c("Nonviolent",
"Accident", "Violent", "Gender/Age")
df_deaths <- relocate(df_deaths, "Gender/Age")
df_deaths <- df_deaths %>%
bind_rows(summarise(., across(where(is.numeric), sum),
across(where(is.character), ~'Total')))
Total <- c()
for (i in c(1:15)) {
Total <- c(Total, sum(df_deaths[i,2:4]))
}
df_deaths <- mutate(df_deaths, Total)
deaths_table <- gt(df_deaths)%>%
# tab_header(
# title = "Table 2: All Deaths in the Sample",
# subtitle = "Classified by Gender and Age"
# )%>%
tab_source_note(source_note = "Unweighted GMS Data")
gtsave(deaths_table, "table2.png")
#Here is a breakdown of violent deaths classified by governorate of origin
table(fates$d04)
table(respondents$d04)
north <- round(nrow(filter(fates, HR06 == 4, d04 == 1)), digits = 0)
city <- nrow(filter(fates, HR06 == 4, d04 == 2))
khan <- nrow(filter(fates, HR06 == 4, d04 == 3))
central <- nrow(filter(fates, HR06 == 4, d04 == 4))
rafah <- nrow(filter(fates, HR06 == 4, d04 == 5))
north+city+central+khan+rafah
north_pop <- round(nrow(filter(fates, d04 == 1)), digits = 0)
city_pop <- nrow(filter(fates, d04 == 2))
khan_pop <- nrow(filter(fates, d04 == 3))
central_pop <- nrow(filter(fates, d04 == 4))
rafah_pop <- nrow(filter(fates, d04 == 5))
north_rate <- round(100*north/north_pop, digits = 1)
city_rate <- round(100*city/city_pop, digits = 1)
central_rate <- round(100*central/central_pop, digits = 1)
khan_rate <- round(100*khan/khan_pop, digits = 1)
rafah_rate <- round(100*rafah/rafah_pop, digits = 1)
goverornate_data <- c(north, city, central, khan, rafah,
north_rate, city_rate, central_rate,
khan_rate, rafah_rate)
matrix_governorate <- matrix(goverornate_data, nrow = 5, ncol = 2)
df_governorate <- as.data.frame(matrix_governorate)
Governorates <- c("Northern Gaza", "Gaza City",
"Deir al-Balah", "Khan Younis",
"Rafah")
df_governorate <- mutate(df_governorate, Governorates)
colnames(df_governorate) <- c("Violent Deaths",
"Violent Deaths as Percent of Sample",
"Governorates")
df_governorate <- relocate(df_governorate, "Governorates")
governorates_table <- gt(df_governorate)%>%
# tab_header(
# title = "Table 3: All Violent Deaths in the Sample",
# subtitle = "Classified by Governorate of Origin"
# )%>%
tab_source_note(source_note = "Unweighted GMS Data")
gtsave(governorates_table, "table3.png")
#Now pull some basic information on births
nrow(births)
#179 boys and 178 girls
nrow(filter(births, BR05 == 1))
nrow(filter(births, BR05 == 2))
boys <- filter(births, BR05 == 1)
girls <- filter(births, BR05 == 2)
#In 2024 144 boys and 151 girls
nrow(filter(births, BR05 == 1, BR04 > "2023-12-31", BR04 < "2025-01-01"))
nrow(filter(births, BR05 == 2, BR04 > "2023-12-31", BR04 < "2025-01-01"))
#Starting 9 months after the war 63 boys and 64 girls
nrow(filter(births, BR05 == 1, BR04 > "2024-08-06"))
nrow(filter(births, BR05 == 2, BR04 > "2024-08-06"))
#There's a decrease in the birth rate starting 9 months after the start
#of the war - from 25.6 to 21.2 per month (in the sample)
nrow(filter(births, BR04 > "2023-10-06", BR04 < "2024-08-07"))/9
nrow(filter(births, BR04 > "2024-08-06"))/6
#4 infant deaths
dead <- filter(births, BR04 > "2023-10-06", BR07 == 4)
nrow(filter(births, BR04 > "2023-10-06", BR07 != 4))
#2 killed violently and 1 in an accident
#Calculate person-days for infants
newborns <- filter(births, BR04 > "2023-10-06")
days <- c()
for (i in c(1:nrow(newborns))) {
days <- c(days,length(seq(from = as.Date("2023-10-07"),
to = as.Date(newborns$BR04[i]),
by = 'day')))
}
#180.1 equivalent newborns exposed for the whole war
equivalent_persons <- round(sum(days)/(length(seq(from = as.Date("2023-10-07"),
to = as.Date("2025-01-06"), by = 'day'))), digits = 1)
infant_data <- c(nrow(boys), nrow(girls), equivalent_persons, nrow(dead))
matrix_infant <- matrix(infant_data, nrow = 1)
df_infant <- as.data.frame(matrix_infant)
colnames(df_infant) <- c("Boys", "Girls",
"Equivalent Persons","Dead")
infant_table <- gt(df_infant)%>%
tab_header(
title = "Born After October 6, 2023"
)
gtsave(infant_table, "infant_table.png")
#An unweighted estimate for the annual birth rate during the war is 62,832
(nrow(boys)+nrow(girls))*220/1.25
library(tidyverse)
march <- read.csv("MOH_List_3_23_25.csv")
october <-read.csv("MoH_List_10_7_24.csv")
children <- nrow(filter(march, Age < 18))
women <- nrow(filter(march, Age > 17, Age < 65, Sex == "Female"))
elderly <- nrow(filter(march, Age > 64))
(children + women + elderly)/nrow(march)
children <- nrow(filter(october, Age < 18))
women <- nrow(filter(october, Age > 17, Age < 65, Sex == "Female"))
elderly <- nrow(filter(october, Age > 64))
(children + women + elderly)/nrow(october)
#calculate the percentage of our lower and upper violent-death estimates
#captured by the MoH
lower <- 45650/63600
upper <- 45650/86800
central <- 45650/75200
1 - central
1- lower
1- upper
#Calculate percentage of population killed
#According to the US census there were 1,064,348 males and 1,034,041 females
#in Gaza at the beginning of the war
population <- 1064348 + 1034041
violent_lower <- 63600
violent_upper <- 86800
violent_central <- 75200
100*violent_lower/population
100*violent_upper/population
100*violent_central/population
#The rest of this code is aimed evaluating the quality of our raking. In
#particular, we create a table that compares population distributions with
#weighted (after raking) sample distributions.
library(tidyverse)
library(gt)
jon <- readRDS("GMS_at_risk_apr_18_2025.rds")
jon |>
zap_label() |>
zap_labels()
## Males Females AgeGr
## 1 1 182000 174000 0-4
## 2 2 200191 188991 5-11
## 3 3 165430 157849 12-17
## 4 4 248133 240116 18-29
## 5 5 130574 133015 30-39
## 6 6 146967 149998 40-64
## 7 7 29944 28670 65+
## 8 NA 1064348 1034041 Total
pop_dem <- c(182000, 174000, 200191, 188991,
165430, 157849, 248133, 240116,
130574, 133015, 146967, 149998,
29944, 28670)
pop_total <- 1064348 + 1034041
pop_dem_percentages <- 100*pop_dem/pop_total
sum(pop_dem_percentages)
sample_dem <- c(nrow(filter(jon, HR04 == 1, HR03 < 5)),
nrow(filter(jon, HR04 == 2, HR03 < 5)),
nrow(filter(jon, HR04 == 1, HR03 > 4, HR03 < 12)),
nrow(filter(jon, HR04 == 2, HR03 > 4, HR03 < 12)),
nrow(filter(jon, HR04 == 1, HR03 > 11, HR03 < 18)),
nrow(filter(jon, HR04 == 2, HR03 > 11, HR03 < 18)),
nrow(filter(jon, HR04 == 1, HR03 > 17, HR03 < 30)),
nrow(filter(jon, HR04 == 2, HR03 > 17, HR03 < 30)),
nrow(filter(jon, HR04 == 1, HR03 > 29, HR03 < 40)),
nrow(filter(jon, HR04 == 2, HR03 > 29, HR03 < 40)),
nrow(filter(jon, HR04 == 1, HR03 > 39, HR03 < 65)),
nrow(filter(jon, HR04 == 2, HR03 > 39, HR03 < 65)),
nrow(filter(jon, HR04 == 1, HR03 > 64)),
nrow(filter(jon, HR04 == 2, HR03 > 64))
)
sample_total <- nrow(jon)
sample_dem_percentages <- 100*sample_dem/sample_total
sum(jon$weights)
sample_dem_weighted <- c(sum(filter(jon, HR04 == 1, HR03 < 5)$weights),
sum(filter(jon, HR04 == 2, HR03 < 5)$weights),
sum(filter(jon, HR04 == 1, HR03 > 4, HR03 < 12)$weights),
sum(filter(jon, HR04 == 2, HR03 > 4, HR03 < 12)$weights),
sum(filter(jon, HR04 == 1, HR03 > 11, HR03 < 18)$weights),
sum(filter(jon, HR04 == 2, HR03 > 11, HR03 < 18)$weights),
sum(filter(jon, HR04 == 1, HR03 > 17, HR03 < 30)$weights),
sum(filter(jon, HR04 == 2, HR03 > 17, HR03 < 30)$weights),
sum(filter(jon, HR04 == 1, HR03 > 29, HR03 < 40)$weights),
sum(filter(jon, HR04 == 2, HR03 > 29, HR03 < 40)$weights),
sum(filter(jon, HR04 == 1, HR03 > 39, HR03 < 65)$weights),
sum(filter(jon, HR04 == 2, HR03 > 39, HR03 < 65)$weights),
sum(filter(jon, HR04 == 1, HR03 > 64)$weights),
sum(filter(jon, HR04 == 2, HR03 > 64)$weights)
)
sample_weighted_total <- sum(sample_dem_weighted)
sample_dem_weighted_percentages <- 100*sample_dem_weighted/
sample_weighted_total
sample_dem_weighted_percentages-pop_dem_percentages
#Here are governorate populations for, in order, North Gaza, Gaza City
#Deir al Balah, Khan Younis and Rafah
pop_gov <- c(418833, 705983, 300835, 413315, 259423)
sum(pop_gov)
pop_gov_percentages <- 100*pop_gov/sum(pop_gov)
sample_gov <- c(nrow(filter (jon, HR00 == 1)),
nrow(filter (jon, HR00 == 2)),
nrow(filter (jon, HR00 == 3)),
nrow(filter (jon, HR00 == 4)),
nrow(filter (jon, HR00 == 5)))
sample_gov_percentages <- 100*sample_gov/nrow(jon)
sample_gov_weighted <- c(sum(filter (jon, HR00 == 1)$weights),
sum(filter (jon, HR00 == 2)$weights),
sum(filter (jon, HR00 == 3)$weights),
sum(filter (jon, HR00 == 4)$weights),
sum(filter (jon, HR00 == 5)$weights))
sample_gov_weighted_percentages <- 100*sample_gov_weighted/
sum(sample_gov_weighted)
sample_gov_weighted_percentages- pop_gov_percentages
#Jon used mics household sizes in the raking so the census information
#is hear just for a possible future comparison
census_hh <- c(11920.57, 70385.14, 114332.80, 194472.65, 275651.26,
337047.61, 334535.13, 274554.36, 191615.78, 293873.71)
census_household_percentages <- 100*census_hh/sum(census_hh)
mics_hh <- c(110, 501, 1069, 1841, 2458,
3218, 3289, 2255, 1816, 2502)
mics_household_percentages <- 100*mics_hh/sum(mics_hh)
sum(mics_household_percentages)
households_df <- jon %>%
group_by(prikey)%>%
summarize(weights = sum(weights),
sizes = mean(hsize10))
sample_hh_1 <- nrow(filter(households_df, sizes == 1))
sample_hh_2 <- nrow(filter(households_df, sizes == 2))
sample_hh_3 <- nrow(filter(households_df, sizes == 3))
sample_hh_4 <- nrow(filter(households_df, sizes == 4))
sample_hh_5 <- nrow(filter(households_df, sizes == 5))
sample_hh_6 <- nrow(filter(households_df, sizes == 6))
sample_hh_7 <- nrow(filter(households_df, sizes == 7))
sample_hh_8 <- nrow(filter(households_df, sizes == 8))
sample_hh_9 <- nrow(filter(households_df, sizes == 9))
sample_hh_10 <- nrow(filter(households_df, sizes == 10))
sample_hh_1_percentage <- 100*sample_hh_1/nrow(households_df)
sample_hh_2_percentage <- 100*sample_hh_2/nrow(households_df)
sample_hh_3_percentage <- 100*sample_hh_3/nrow(households_df)
sample_hh_4_percentage <- 100*sample_hh_4/nrow(households_df)
sample_hh_5_percentage <- 100*sample_hh_5/nrow(households_df)
sample_hh_6_percentage <- 100*sample_hh_6/nrow(households_df)
sample_hh_7_percentage <- 100*sample_hh_7/nrow(households_df)
sample_hh_8_percentage <- 100*sample_hh_8/nrow(households_df)
sample_hh_9_percentage <- 100*sample_hh_9/nrow(households_df)
sample_hh_10_percentage <- 100*sample_hh_10/nrow(households_df)
sample_household_percentages <- c(sample_hh_1_percentage,
sample_hh_2_percentage,
sample_hh_3_percentage,
sample_hh_4_percentage,
sample_hh_5_percentage,
sample_hh_6_percentage,
sample_hh_7_percentage,
sample_hh_8_percentage,
sample_hh_9_percentage,
sample_hh_10_percentage)
sum(sample_household_percentages)
sample_hh_1_weighted <- sum(filter(households_df, sizes == 1)$weights)
sample_hh_2_weighted <- sum(filter(households_df, sizes == 2)$weights)
sample_hh_3_weighted <- sum(filter(households_df, sizes == 3)$weights)
sample_hh_4_weighted <- sum(filter(households_df, sizes == 4)$weights)
sample_hh_5_weighted <- sum(filter(households_df, sizes == 5)$weights)
sample_hh_6_weighted <- sum(filter(households_df, sizes == 6)$weights)
sample_hh_7_weighted <- sum(filter(households_df, sizes == 7)$weights)
sample_hh_8_weighted <- sum(filter(households_df, sizes == 8)$weights)
sample_hh_9_weighted <- sum(filter(households_df, sizes == 9)$weights)
sample_hh_10_weighted <- sum(filter(households_df, sizes == 10)$weights)
sample_hh_1_weighted_percentage <- 100*sample_hh_1_weighted/sum(households_df$weights)
sample_hh_2_weighted_percentage <- 100*sample_hh_2_weighted/sum(households_df$weights)
sample_hh_3_weighted_percentage <- 100*sample_hh_3_weighted/sum(households_df$weights)
sample_hh_4_weighted_percentage <- 100*sample_hh_4_weighted/sum(households_df$weights)
sample_hh_5_weighted_percentage <- 100*sample_hh_5_weighted/sum(households_df$weights)
sample_hh_6_weighted_percentage <- 100*sample_hh_6_weighted/sum(households_df$weights)
sample_hh_7_weighted_percentage <- 100*sample_hh_7_weighted/sum(households_df$weights)
sample_hh_8_weighted_percentage <- 100*sample_hh_8_weighted/sum(households_df$weights)
sample_hh_9_weighted_percentage <- 100*sample_hh_9_weighted/sum(households_df$weights)
sample_hh_10_weighted_percentage <- 100*sample_hh_10_weighted/sum(households_df$weights)
sample_household_weighted_percentages <- c(sample_hh_1_weighted_percentage,
sample_hh_2_weighted_percentage,
sample_hh_3_weighted_percentage,
sample_hh_4_weighted_percentage,
sample_hh_5_weighted_percentage,
sample_hh_6_weighted_percentage,
sample_hh_7_weighted_percentage,
sample_hh_8_weighted_percentage,
sample_hh_9_weighted_percentage,
sample_hh_10_weighted_percentage)
sum(sample_household_weighted_percentages)
census_household_percentages-sample_household_weighted_percentages
sum(mics_household_percentages-sample_household_weighted_percentages)
raking_data <- c(sample_dem_percentages, sample_gov_percentages,
sample_household_percentages,
pop_dem_percentages, pop_gov_percentages,
mics_household_percentages,
sample_dem_weighted_percentages, sample_gov_weighted_percentages,
sample_household_weighted_percentages)
raking_matrix <- matrix(raking_data, ncol = 3)
df_raking <- as.data.frame(raking_matrix)
df_raking <- mutate(df_raking,
df_raking[,2] - df_raking[,3])
categories <- c("Male 0-4", "Female 0-4", "Male 5-14",
"Female 5-14", "Male 15-17", "Female 15-17",
"Male 18-29", "Female 18-29", "Male 30-39",
"Female 30-39", "Male 40-64",
"Female 40-64", "Male 65+", "Female 65+",
"North Gaza", "Gaza City","Deir al Balah",
"Khan Younis", "Rafah",
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10+"
)
df_raking <- mutate(df_raking, categories)
colnames(df_raking) <- c("Sample Percentages", "Population Percentages",
"Raked Sample Percentages", "Population - Raked Sample",
"Categories")
df_raking <- relocate(df_raking, "Categories")
raking_table <- gt(df_raking)%>%
tab_header(
title = "Table S7: Comparison of Population Values with Sample and Raked Sample Values",
subtitle = "By Demographics, Governorate and Household Size"
) %>%
fmt_number(n_sigfig = 3)%>%
tab_row_group(label = "Household Sizes",
rows = c(20:29)) %>%
tab_row_group(label = "Governorates",
rows = c(15:19))%>%
tab_row_group(label = "Ages/Sexes",
rows = c(1:14))
gtsave(raking_table, "raking_table.png")
summary(df_raking$`Population - Raked Sample`)
#Raked percentages are very close to population percentages with the exception
#of for 10+ households which differ by about 2 percentage points so it is
#worth checking whether the 10+ households have substantially different
#death rates in the unweighted sample than the other households. The violent
#death rate for the 10+ households is 3.1% compared to 4.0 for the other
#households so upweighting the 10+ households would decrease the violent
#death estimate but not by much since their are few 10+ households and
#a slightly upweight would not matter much.
100*nrow(filter(jon, hsize > 9 & Violent == 1))/
nrow(filter(jon, hsize > 9))
100*nrow(filter(jon, hsize < 10 & Violent == 1))/
nrow(filter(jon, hsize < 10))