---
title: "01_Analysis_Main"
author: "Camille"
date: "3/1/2023"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
Generates code for main analysis (total PA and PA entrance) and aggregate analysis robustness check
```{r Load data and package}
#load packages
library(tidyverse)
library(lfe)
getwd()
#load data
master_full <- read.csv("~/Documents/GitHub/TourismDeforestation2/Data/MasterSheet_full.csv")
master_full$Tourism <- master_full$Tourism/1000
master_full$PercentDeforest <- master_full$PercentDeforest*100
master_full <- master_full %>% filter(Year >= 2001)
master_full$precipitation <- master_full$precipitation*12
master_full$Year <- as.character(master_full$Year)
master_commune <- read.csv("~/Documents/GitHub/TourismDeforestation2/Data/MasterSheet_commune.csv")
master_commune$Tourism <- master_commune$Tourism/1000
master_commune$PercentDeforest <- master_commune$PercentDeforest*100
master_commune$precipitation <- master_commune$precipitation*12
master_commune <- master_commune %>% filter(Year >= 2001)
master_agg <- read.csv("~/Documents/GitHub/TourismDeforestation2/Data/aggregate_master.csv")
master_agg$precipitation <- master_agg$precipitation*12
master_full_funding <- read.csv("~/Documents/GitHub/TourismDeforestation2/Data/MasterSheet_full_funding.csv")
master_commune_funding <- read.csv("~/Documents/GitHub/TourismDeforestation2/Data/MasterSheet_commune_funding.csv")
master_full_funding$precipitation <- master_full_funding$precipitation*12
master_commune_funding$precipitation <- master_commune_funding$precipitation*12
```
```{r Full Analysis}
#MAIN ANALYSIS
fem_full_formula <- as.formula("PercentDeforest ~ Tourism + PopDensity + precipitation + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full <- felm(fem_full_formula, data = master_full)
summary_dd_reg_full <- summary(dd_reg_full)
dd_reg_full_coef <- as.data.frame(summary_dd_reg_full$coefficients)
dd_reg_full_coef$model <- "main"
100*dd_reg_full_coef[1,1]/mean(master_full$PercentDeforest[master_full$Year>2000])
#Full analysis without precipitation
fem_full_formula2 <- as.formula("PercentDeforest ~ Tourism + PopDensity +OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full2 <- felm(fem_full_formula2, data = master_full)
summary_dd_reg_full2 <- summary(dd_reg_full2)
dd_reg_full2_coef <- as.data.frame(summary_dd_reg_full2$coefficients)
dd_reg_full2_coef$model <- "No_Precip"
#FULL ANALYSIS WITH funding
#subset the data to only include 2004-2011
master_full_fund <- master_full %>% filter(Year <=2012& Year >= 2005, Name != "Anjanaharibe-Sud", Name != "Pic d'Ivohibe")
fem_full_formula_fund <- as.formula("PercentDeforest ~ Tourism + PopDensity + precipitation + OilPriceAdj+ funding| Year + Name | 0 | Regions")
dd_reg_full_fund <- felm(fem_full_formula_fund, data = master_full_fund)
summary_dd_reg_full_fund <- summary(dd_reg_full_fund)
dd_reg_full_fund_coef <- as.data.frame(summary_dd_reg_full_fund$coefficients)
dd_reg_full_fund_coef$model <- "funding"
#subset the data to only include 2004-2011, do not include funding
fem_full_formula_fund2 <- as.formula("PercentDeforest ~ Tourism + PopDensity + precipitation + OilPriceAdj| Year + Name | 0 | Regions")
dd_reg_full_fund2 <- felm(fem_full_formula_fund2, data = master_full_fund)
summary_dd_reg_full_fund2 <- summary(dd_reg_full_fund2)
dd_reg_full_fund_coef2 <- as.data.frame(summary_dd_reg_full_fund2$coefficients)
dd_reg_full_fund_coef2$model <- "funding2"
```
```{r Commune Analysis }
#commune ANALYSIS WITHOUT INTERACTION
master_commune <- master_commune %>% filter(Name !="Andringitra")
fem_commune_formula <- as.formula("PercentDeforest ~ Tourism + PopDensityCommune + precipitation +OilPriceAdj| Year + Name | 0 | Regions")
dd_reg_commune <- felm(fem_commune_formula, data = master_commune)
summary(dd_reg_commune)
summary_dd_reg_commune <- summary(dd_reg_commune)
dd_reg_commune_coef <- as.data.frame(summary_dd_reg_commune$coefficients)
dd_reg_commune_coef$model <- "main"
100*dd_reg_commune_coef[1,1]/mean(master_commune$PercentDeforest[master_commune$Year>2000])
#commune analysis without precipitation
fem_commune_formula2 <- as.formula("PercentDeforest ~ Tourism + PopDensityCommune +OilPriceAdj| Year + Name | 0 | Regions")
dd_reg_commune2 <- felm(fem_commune_formula2, data = master_commune)
summary_dd_reg_commune2 <- summary(dd_reg_commune2)
dd_reg_commune2_coef <- as.data.frame(summary_dd_reg_commune2$coefficients)
dd_reg_commune2_coef$model <- "commune"
#commune ANALYSIS WITH FUNDING
#subset the data to only include 2004-2011
master_commune_fund <- master_commune %>% filter(Year <=2011& Year >= 2004, Name != "Anjanaharibe-Sud", Name != "Pic d'Ivohibe")
fem_commune_formula_fund <- as.formula("PercentDeforest ~ Tourism + PopDensityCommune + precipitation +OilPriceAdj+ funding| Year + Name | 0 | Regions")
dd_reg_commune_fund <- felm(fem_commune_formula_fund, data = master_commune_fund)
summary(dd_reg_commune_fund)
summary_dd_reg_commune_fund <- summary(dd_reg_commune_fund)
dd_reg_commune_fund_coef <- as.data.frame(summary_dd_reg_commune_fund$coefficients)
dd_reg_commune_fund_coef$model <- "fund"
```
```{r Aggregate Analysis}
#Full park data
master_agg$Tourism <- master_agg$Tourism/1000
master_agg$PercentDeforest <- master_agg$PercentDeforest*100
#agg ANALYSIS FULL
fem_agg_formula <- as.formula("PercentDeforest ~ Tourism + PopDensity + precipitation+OilPriceAdj| Year + Name | 0 | Regions")
dd_reg_agg <- felm(fem_agg_formula, data = master_agg)
summary_dd_reg_agg <- summary(dd_reg_agg)
dd_reg_agg_coef <- as.data.frame(summary_dd_reg_agg$coefficients)
dd_reg_agg_coef$model <- "main"
100*dd_reg_agg_coef[1,1]/mean(master_agg$PercentDeforest)
#agg analysis without precipitation
fem_agg_formula2 <- as.formula("PercentDeforest ~ Tourism + PopDensity +OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_agg2 <- felm(fem_agg_formula2, data = master_agg)
summary_dd_reg_agg2 <- summary(dd_reg_agg2)
dd_reg_agg2_coef <- as.data.frame(summary_dd_reg_agg2$coefficients)
dd_reg_agg2_coef$model <- "no_precipitation"
```
```{Lagged Tourism Effects}
Tourism_1YearBefore <- read.csv("DataKeep/Tourism_1YearBefore.csv")
Tourism_1YearBefore2 <- Tourism_1YearBefore %>%
pivot_longer(!Name, names_to = "Year", values_to = "Tourism_1YearBefore")
Tourism_1YearBefore2$Year <- substr(Tourism_1YearBefore2$Year, 2, nchar(Tourism_1YearBefore2$Year))
master_full <- left_join(master_full, Tourism_1YearBefore2, by = c("Name", "Year"))
Tourism_5YearsBefore <- read.csv("DataKeep/Tourism_5YearsBefore.csv")
Tourism_5YearsBefore2 <- Tourism_5YearsBefore %>%
pivot_longer(!Name, names_to = "Year", values_to = "Tourism_5YearsBefore")
Tourism_5YearsBefore2$Year <- substr(Tourism_5YearsBefore2$Year, 2, nchar(Tourism_5YearsBefore2$Year))
master_full <- left_join(master_full, Tourism_5YearsBefore2, by = c("Name", "Year"))
fem_full_formula_Tourismlag1 <- as.formula("PercentDeforest ~ Tourism + Tourism_1YearBefore+ PopDensity + precipitation + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full_Tourismlag1 <- felm(fem_full_formula_Tourismlag1, data = master_full)
summary(dd_reg_full_Tourismlag1)
fem_full_formula_Tourismlag5 <- as.formula("PercentDeforest ~ Tourism + Tourism_5YearsBefore+ PopDensity + precipitation + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full_Tourismlag5 <- felm(fem_full_formula_Tourismlag5, data = master_full)
summary(dd_reg_full_Tourismlag5)
```
```{r Wrangle Dataframes}
#Main Analysis
main_models <- rbind(dd_reg_full_coef, dd_reg_full2_coef, dd_reg_full_fund_coef, dd_reg_full_fund_coef2)
main_models$Variable <- rownames(main_models)
main_models4 <- main_models[1:4,]
main_models4$model <- "PA"
main_models4$`Pr(>|t|)` <- round(main_models4$`Pr(>|t|)`, digits = 3)
main_models4$Estimate <- round(main_models4$Estimate, digits = 3)
main_models4$`Cluster s.e.` <- round(main_models4$`Cluster s.e.`, digits = 3)
main_models4$`t value` <- round(main_models4$`t value`, digits = 3)
main_sensitivity4 <- main_models[-c(1:4),]
main_sensitivity4$`Pr(>|t|)` <- round(main_sensitivity4$`Pr(>|t|)`, digits = 3)
main_sensitivity4$Estimate <- round(main_sensitivity4$Estimate, digits = 3)
main_sensitivity4$`Cluster s.e.` <- round(main_sensitivity4$`Cluster s.e.`, digits = 3)
main_sensitivity4$`t value` <- round(main_sensitivity4$`t value`, digits = 3)
main_sensitivity4$Model <- "PA"
#commune analysis
commune_models <- rbind(dd_reg_commune_coef, dd_reg_commune2_coef, dd_reg_commune_fund_coef)
commune_models$Variable <- rownames(commune_models)
commune_models4 <- commune_models[1:4,]
commune_models4$model <- "Commune"
commune_models4$`Pr(>|t|)` <- round(commune_models4$`Pr(>|t|)`, digits = 3)
commune_models4$Estimate <- round(commune_models4$Estimate, digits = 3)
commune_models4$`Cluster s.e.` <- round(commune_models4$`Cluster s.e.`, digits = 3)
commune_models4$`t value` <- round(commune_models4$`t value`, digits = 3)
commune_sensitivity4 <- commune_models[-c(1:4),]
commune_sensitivity4$`Pr(>|t|)` <- round(commune_sensitivity4$`Pr(>|t|)`, digits = 3)
commune_sensitivity4$Estimate <- round(commune_sensitivity4$Estimate, digits = 3)
commune_sensitivity4$`Cluster s.e.` <- round(commune_sensitivity4$`Cluster s.e.`, digits = 3)
commune_sensitivity4$`t value` <- round(commune_sensitivity4$`t value`, digits = 3)
commune_sensitivity4$Model <- "Commune"
#aggregate analysis
agg_models <- rbind(dd_reg_agg_coef, dd_reg_agg2_coef)
agg_models$Variable <- rownames(agg_models)
agg_models2 <- agg_models %>% dplyr::select(Variable, Estimate, `Cluster s.e.`, `Pr(>|t|)`, model)
agg_models4 <- agg_models[1:4,]
agg_models4$model <- "Aggregate"
agg_models4$`Pr(>|t|)` <- round(agg_models4$`Pr(>|t|)`, digits = 3)
agg_models4$Estimate <- round(agg_models4$Estimate, digits = 3)
agg_models4$`Cluster s.e.` <- round(agg_models4$`Cluster s.e.`, digits = 3)
agg_models4$`t value` <- round(agg_models4$`t value`, digits = 3)
agg_sensitivity4 <- agg_models[-c(1:4),]
agg_sensitivity4$`Pr(>|t|)` <- round(agg_sensitivity4$`Pr(>|t|)`, digits = 3)
agg_sensitivity4$Estimate <- round(agg_sensitivity4$Estimate, digits = 3)
agg_sensitivity4$`Cluster s.e.` <- round(agg_sensitivity4$`Cluster s.e.`, digits = 3)
agg_sensitivity4$`t value` <- round(agg_sensitivity4$`t value`, digits = 3)
agg_sensitivity4$Model <- "Aggregate"
regression_table <- rbind(main_models4, commune_models4)
regression_table <- regression_table %>% dplyr::select(model, Variable, Estimate, `Cluster s.e.`, `t value`, `Pr(>|t|)`) %>% rename(Analysis = model)
regression_table$Variable[regression_table$Variable=="precipitation"] <- "Precipitation"
regression_table$Variable[regression_table$Variable=="PopDensity"] <- "Pop. Density"
regression_table$Variable[regression_table$Variable=="OilPriceAdj"] <- "Oil Price"
regression_table$Variable[regression_table$Variable=="PopDensityCommune"] <- "Pop. Density"
sensitivity_table <- rbind(main_sensitivity4)
sensitivity_table$Variable[sensitivity_table$Variable=="precipitation1"] <- "Precipitation"
sensitivity_table$Variable[sensitivity_table$Variable=="precipitation2"] <- "Precipitation"
sensitivity_table$Variable[sensitivity_table$Variable=="precipitation3"] <- "Precipitation"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensity1"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensity2"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensity3"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensity4"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensityCommune1"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensityCommune2"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensityCommune3"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensityCommune4"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="OilPriceAdj1"] <- "Oil Price"
sensitivity_table$Variable[sensitivity_table$Variable=="OilPriceAdj2"] <- "Oil Price"
sensitivity_table$Variable[sensitivity_table$Variable=="OilPriceAdj3"] <- "Oil Price"
sensitivity_table$Variable[sensitivity_table$Variable=="OilPriceAdj4"] <- "Oil Price"
sensitivity_table$Variable[sensitivity_table$Variable=="interaction"] <- "Interaction"
sensitivity_table$Variable[sensitivity_table$Variable=="Tourism1"] <- "Tourism"
sensitivity_table$Variable[sensitivity_table$Variable=="Tourism2"] <- "Tourism"
sensitivity_table$Variable[sensitivity_table$Variable=="Tourism3"] <- "Tourism"
sensitivity_table$Variable[sensitivity_table$Variable=="Tourism4"] <- "Tourism"
sensitivity_table$Variable[sensitivity_table$Variable=="funding"] <- "Funding"
sensitivity_table$model[sensitivity_table$model=="No_Precip"] <- "No Precip."
sensitivity_table$model[sensitivity_table$model=="no_precipitation"] <- "No Precip."
sensitivity_table$model[sensitivity_table$model=="funding"] <- "Funding, 2004-2011"
sensitivity_table$model[sensitivity_table$model=="interaction"] <- "Interaction"
sensitivity_table$model[sensitivity_table$model=="fund"] <- "Funding, 2004-2011"
sensitivity_table$model[sensitivity_table$model=="funding2"] <- "No Funding, 2004-2011"
sensitivity_table <- sensitivity_table %>% dplyr::select(-Model)
sensitivity_table <- rbind(agg_models4,sensitivity_table)
regression_table$Location <- "Inside PA"
write.csv(regression_table, "Outputs/regression_table.csv")
write.csv(sensitivity_table, "Outputs/sensitivity_table.csv")
```
```{r Other Precipitation Variables}
#Inside PA
mada_precip_sensitivity <- read.csv("~/Documents/GitHub/TourismDeforestationSubmission/Data/precip_sensitivity.csv")
mada_precip_sensitivity$min <- mada_precip_sensitivity$min *12
mada_precip_sensitivity$max <- mada_precip_sensitivity$max *12
mada_precip_sensitivity$Year <- as.character(mada_precip_sensitivity$Year)
master_commune$Year <- as.character(master_commune$Year)
master_full_precip <- dplyr::left_join(master_full, mada_precip_sensitivity, by=c("Name", "Year"))
fem_full_formula_min <- as.formula("PercentDeforest ~ Tourism + PopDensity + min + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full_min <- felm(fem_full_formula_min, data =master_full_precip)
summary_dd_reg_full_min <- summary(dd_reg_full_min)
dd_reg_full_coef_min <- as.data.frame(summary_dd_reg_full_min$coefficients)
dd_reg_full_coef_min$model <- "Min"
fem_full_formula_max <- as.formula("PercentDeforest ~ Tourism + PopDensity + max + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full_max <- felm(fem_full_formula_max, data =master_full_precip)
summary_dd_reg_full_max <- summary(dd_reg_full_max)
dd_reg_full_coef_max <- as.data.frame(summary_dd_reg_full_max$coefficients)
dd_reg_full_coef_max$model <- "Max"
fem_full_formula_seasonality <- as.formula("PercentDeforest ~ Tourism + PopDensity + seasonality + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full_seasonality <- felm(fem_full_formula_seasonality, data =master_full_precip)
summary_dd_reg_full_seasonality <- summary(dd_reg_full_seasonality)
dd_reg_full_coef_seasonality <- as.data.frame(summary_dd_reg_full_seasonality$coefficients)
dd_reg_full_coef_seasonality$model <- "Seasonality"
precipitation_sensitivity <- rbind(dd_reg_full_coef_min, dd_reg_full_coef_max, dd_reg_full_coef_seasonality)
precipitation_sensitivity$Variable <- rep(c("Tourism", "Pop. Density", "Precipitation", "Oil Price"),3)
precipitation_sensitivity$Location <- "Inside PA"
precipitation_sensitivity$`Pr(>|t|)` <- round(precipitation_sensitivity$`Pr(>|t|)`, digits = 3)
precipitation_sensitivity$Estimate <- round(precipitation_sensitivity$Estimate, digits = 3)
precipitation_sensitivity$`Cluster s.e.` <- round(precipitation_sensitivity$`Cluster s.e.`, digits = 3)
precipitation_sensitivity$`t value` <- round(precipitation_sensitivity$`t value`, digits = 3)
write.csv(precipitation_sensitivity, "Outputs/precipitation_sensitivity.csv")
```