TourismDeforestationSubmission / 01a_Analysis_Main.Rmd
01a_Analysis_Main.Rmd
Raw
---
title: "01_Analysis_Main"
author: "Camille"
date: "3/1/2023"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

Generates code for main analysis (total PA and PA entrance) and aggregate analysis robustness check

```{r Load data and package}
#load packages
library(tidyverse)
library(lfe)
getwd()

#load data 
master_full <- read.csv("~/Documents/GitHub/TourismDeforestation2/Data/MasterSheet_full.csv")
master_full$Tourism <- master_full$Tourism/1000
master_full$PercentDeforest <- master_full$PercentDeforest*100
master_full <- master_full %>% filter(Year >= 2001)
master_full$precipitation <- master_full$precipitation*12
master_full$Year <- as.character(master_full$Year)
master_commune <- read.csv("~/Documents/GitHub/TourismDeforestation2/Data/MasterSheet_commune.csv")
master_commune$Tourism <- master_commune$Tourism/1000
master_commune$PercentDeforest <- master_commune$PercentDeforest*100
master_commune$precipitation <- master_commune$precipitation*12
master_commune <- master_commune %>% filter(Year >= 2001)
master_agg <- read.csv("~/Documents/GitHub/TourismDeforestation2/Data/aggregate_master.csv")
master_agg$precipitation <- master_agg$precipitation*12

master_full_funding <- read.csv("~/Documents/GitHub/TourismDeforestation2/Data/MasterSheet_full_funding.csv")
master_commune_funding <- read.csv("~/Documents/GitHub/TourismDeforestation2/Data/MasterSheet_commune_funding.csv")

master_full_funding$precipitation <- master_full_funding$precipitation*12
master_commune_funding$precipitation <- master_commune_funding$precipitation*12

```

```{r Full Analysis}
#MAIN ANALYSIS 
fem_full_formula <- as.formula("PercentDeforest ~ Tourism + PopDensity + precipitation + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full <- felm(fem_full_formula, data = master_full)
summary_dd_reg_full <- summary(dd_reg_full)
dd_reg_full_coef <- as.data.frame(summary_dd_reg_full$coefficients)
dd_reg_full_coef$model <- "main"

 100*dd_reg_full_coef[1,1]/mean(master_full$PercentDeforest[master_full$Year>2000])

#Full analysis without precipitation
fem_full_formula2 <- as.formula("PercentDeforest ~ Tourism + PopDensity +OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full2 <- felm(fem_full_formula2, data = master_full)
summary_dd_reg_full2 <- summary(dd_reg_full2)
dd_reg_full2_coef <- as.data.frame(summary_dd_reg_full2$coefficients)
dd_reg_full2_coef$model <- "No_Precip"

#FULL ANALYSIS WITH funding 
#subset the data to only include 2004-2011
master_full_fund <- master_full %>% filter(Year <=2012& Year >= 2005, Name != "Anjanaharibe-Sud", Name != "Pic d'Ivohibe")
fem_full_formula_fund <- as.formula("PercentDeforest ~ Tourism + PopDensity + precipitation + OilPriceAdj+ funding| Year + Name | 0 | Regions")
dd_reg_full_fund <- felm(fem_full_formula_fund, data = master_full_fund)
summary_dd_reg_full_fund <- summary(dd_reg_full_fund)
dd_reg_full_fund_coef <- as.data.frame(summary_dd_reg_full_fund$coefficients)
dd_reg_full_fund_coef$model <- "funding"

#subset the data to only include 2004-2011, do not include funding
fem_full_formula_fund2 <- as.formula("PercentDeforest ~ Tourism + PopDensity + precipitation + OilPriceAdj| Year + Name | 0 | Regions")
dd_reg_full_fund2 <- felm(fem_full_formula_fund2, data = master_full_fund)
summary_dd_reg_full_fund2 <- summary(dd_reg_full_fund2)
dd_reg_full_fund_coef2 <- as.data.frame(summary_dd_reg_full_fund2$coefficients)
dd_reg_full_fund_coef2$model <- "funding2"

```

```{r Commune Analysis }
#commune ANALYSIS WITHOUT INTERACTION

master_commune <- master_commune %>% filter(Name !="Andringitra")

fem_commune_formula <- as.formula("PercentDeforest ~ Tourism + PopDensityCommune + precipitation +OilPriceAdj| Year + Name | 0 | Regions")
dd_reg_commune <- felm(fem_commune_formula, data = master_commune)
summary(dd_reg_commune)
summary_dd_reg_commune <- summary(dd_reg_commune)
dd_reg_commune_coef <- as.data.frame(summary_dd_reg_commune$coefficients)
dd_reg_commune_coef$model <- "main"

100*dd_reg_commune_coef[1,1]/mean(master_commune$PercentDeforest[master_commune$Year>2000])

#commune analysis without precipitation
fem_commune_formula2 <- as.formula("PercentDeforest ~ Tourism + PopDensityCommune +OilPriceAdj| Year + Name | 0 | Regions")
dd_reg_commune2 <- felm(fem_commune_formula2, data = master_commune)
summary_dd_reg_commune2 <- summary(dd_reg_commune2)
dd_reg_commune2_coef <- as.data.frame(summary_dd_reg_commune2$coefficients)
dd_reg_commune2_coef$model <- "commune"

#commune ANALYSIS WITH FUNDING 
#subset the data to only include 2004-2011
master_commune_fund <- master_commune %>% filter(Year <=2011& Year >= 2004, Name != "Anjanaharibe-Sud", Name != "Pic d'Ivohibe")
fem_commune_formula_fund <- as.formula("PercentDeforest ~ Tourism + PopDensityCommune + precipitation +OilPriceAdj+ funding| Year + Name | 0 | Regions")
dd_reg_commune_fund <- felm(fem_commune_formula_fund, data = master_commune_fund)
summary(dd_reg_commune_fund)
summary_dd_reg_commune_fund <- summary(dd_reg_commune_fund)
dd_reg_commune_fund_coef <- as.data.frame(summary_dd_reg_commune_fund$coefficients)
dd_reg_commune_fund_coef$model <- "fund"



```

```{r Aggregate Analysis}
#Full park data
master_agg$Tourism <- master_agg$Tourism/1000
master_agg$PercentDeforest <- master_agg$PercentDeforest*100

#agg ANALYSIS FULL
fem_agg_formula <- as.formula("PercentDeforest ~ Tourism + PopDensity + precipitation+OilPriceAdj| Year + Name | 0 | Regions")
dd_reg_agg <- felm(fem_agg_formula, data = master_agg)
summary_dd_reg_agg <- summary(dd_reg_agg)
dd_reg_agg_coef <- as.data.frame(summary_dd_reg_agg$coefficients)
dd_reg_agg_coef$model <- "main"


100*dd_reg_agg_coef[1,1]/mean(master_agg$PercentDeforest)


#agg analysis without precipitation
fem_agg_formula2 <- as.formula("PercentDeforest ~ Tourism + PopDensity +OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_agg2 <- felm(fem_agg_formula2, data = master_agg)
summary_dd_reg_agg2 <- summary(dd_reg_agg2)
dd_reg_agg2_coef <- as.data.frame(summary_dd_reg_agg2$coefficients)
dd_reg_agg2_coef$model <- "no_precipitation"


```

```{Lagged Tourism Effects}
Tourism_1YearBefore <- read.csv("DataKeep/Tourism_1YearBefore.csv")
Tourism_1YearBefore2 <- Tourism_1YearBefore  %>%
  pivot_longer(!Name, names_to = "Year", values_to = "Tourism_1YearBefore")
Tourism_1YearBefore2$Year <- substr(Tourism_1YearBefore2$Year, 2, nchar(Tourism_1YearBefore2$Year))

master_full <- left_join(master_full, Tourism_1YearBefore2, by = c("Name", "Year"))

Tourism_5YearsBefore <- read.csv("DataKeep/Tourism_5YearsBefore.csv")
Tourism_5YearsBefore2 <- Tourism_5YearsBefore  %>%
  pivot_longer(!Name, names_to = "Year", values_to = "Tourism_5YearsBefore")
Tourism_5YearsBefore2$Year <- substr(Tourism_5YearsBefore2$Year, 2, nchar(Tourism_5YearsBefore2$Year))

master_full <- left_join(master_full, Tourism_5YearsBefore2, by = c("Name", "Year"))

fem_full_formula_Tourismlag1 <- as.formula("PercentDeforest ~ Tourism + Tourism_1YearBefore+ PopDensity + precipitation + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full_Tourismlag1 <- felm(fem_full_formula_Tourismlag1, data = master_full)
summary(dd_reg_full_Tourismlag1)

fem_full_formula_Tourismlag5 <- as.formula("PercentDeforest ~ Tourism + Tourism_5YearsBefore+ PopDensity + precipitation + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full_Tourismlag5 <- felm(fem_full_formula_Tourismlag5, data = master_full)
summary(dd_reg_full_Tourismlag5)

```

```{r Wrangle Dataframes}
#Main Analysis 
main_models <- rbind(dd_reg_full_coef, dd_reg_full2_coef,  dd_reg_full_fund_coef,  dd_reg_full_fund_coef2)
main_models$Variable <- rownames(main_models)

main_models4 <- main_models[1:4,]
main_models4$model <- "PA"
main_models4$`Pr(>|t|)` <- round(main_models4$`Pr(>|t|)`, digits = 3)
main_models4$Estimate <- round(main_models4$Estimate, digits = 3)
main_models4$`Cluster s.e.` <- round(main_models4$`Cluster s.e.`, digits = 3)
main_models4$`t value` <- round(main_models4$`t value`, digits = 3)

main_sensitivity4 <- main_models[-c(1:4),]
main_sensitivity4$`Pr(>|t|)` <- round(main_sensitivity4$`Pr(>|t|)`, digits = 3)
main_sensitivity4$Estimate <- round(main_sensitivity4$Estimate, digits = 3)
main_sensitivity4$`Cluster s.e.` <- round(main_sensitivity4$`Cluster s.e.`, digits = 3)
main_sensitivity4$`t value` <- round(main_sensitivity4$`t value`, digits = 3)
main_sensitivity4$Model <- "PA"


#commune analysis
commune_models <- rbind(dd_reg_commune_coef, dd_reg_commune2_coef, dd_reg_commune_fund_coef)
commune_models$Variable <- rownames(commune_models)

commune_models4 <- commune_models[1:4,]
commune_models4$model <- "Commune"
commune_models4$`Pr(>|t|)` <- round(commune_models4$`Pr(>|t|)`, digits = 3)
commune_models4$Estimate <- round(commune_models4$Estimate, digits = 3)
commune_models4$`Cluster s.e.` <- round(commune_models4$`Cluster s.e.`, digits = 3)
commune_models4$`t value` <- round(commune_models4$`t value`, digits = 3)


commune_sensitivity4 <- commune_models[-c(1:4),]
commune_sensitivity4$`Pr(>|t|)` <- round(commune_sensitivity4$`Pr(>|t|)`, digits = 3)
commune_sensitivity4$Estimate <- round(commune_sensitivity4$Estimate, digits = 3)
commune_sensitivity4$`Cluster s.e.` <- round(commune_sensitivity4$`Cluster s.e.`, digits = 3)
commune_sensitivity4$`t value` <- round(commune_sensitivity4$`t value`, digits = 3)
commune_sensitivity4$Model <- "Commune"


#aggregate analysis
agg_models <- rbind(dd_reg_agg_coef, dd_reg_agg2_coef)
agg_models$Variable <- rownames(agg_models)
agg_models2 <- agg_models %>% dplyr::select(Variable, Estimate, `Cluster s.e.`, `Pr(>|t|)`, model)

agg_models4 <- agg_models[1:4,]
agg_models4$model <- "Aggregate"
agg_models4$`Pr(>|t|)` <- round(agg_models4$`Pr(>|t|)`, digits = 3)
agg_models4$Estimate <- round(agg_models4$Estimate, digits = 3)
agg_models4$`Cluster s.e.` <- round(agg_models4$`Cluster s.e.`, digits = 3)
agg_models4$`t value` <- round(agg_models4$`t value`, digits = 3)

agg_sensitivity4 <- agg_models[-c(1:4),]
agg_sensitivity4$`Pr(>|t|)` <- round(agg_sensitivity4$`Pr(>|t|)`, digits = 3)
agg_sensitivity4$Estimate <- round(agg_sensitivity4$Estimate, digits = 3)
agg_sensitivity4$`Cluster s.e.` <- round(agg_sensitivity4$`Cluster s.e.`, digits = 3)
agg_sensitivity4$`t value` <- round(agg_sensitivity4$`t value`, digits = 3)
agg_sensitivity4$Model <- "Aggregate"


regression_table <- rbind(main_models4, commune_models4)
regression_table <- regression_table %>% dplyr::select(model, Variable, Estimate, `Cluster s.e.`, `t value`, `Pr(>|t|)`) %>% rename(Analysis = model)
regression_table$Variable[regression_table$Variable=="precipitation"] <- "Precipitation"
regression_table$Variable[regression_table$Variable=="PopDensity"] <- "Pop. Density"
regression_table$Variable[regression_table$Variable=="OilPriceAdj"] <- "Oil Price"
regression_table$Variable[regression_table$Variable=="PopDensityCommune"] <- "Pop. Density"


sensitivity_table <- rbind(main_sensitivity4)
sensitivity_table$Variable[sensitivity_table$Variable=="precipitation1"] <- "Precipitation"
sensitivity_table$Variable[sensitivity_table$Variable=="precipitation2"] <- "Precipitation"
sensitivity_table$Variable[sensitivity_table$Variable=="precipitation3"] <- "Precipitation"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensity1"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensity2"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensity3"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensity4"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensityCommune1"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensityCommune2"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensityCommune3"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="PopDensityCommune4"] <- "Pop. Density"
sensitivity_table$Variable[sensitivity_table$Variable=="OilPriceAdj1"] <- "Oil Price"
sensitivity_table$Variable[sensitivity_table$Variable=="OilPriceAdj2"] <- "Oil Price"
sensitivity_table$Variable[sensitivity_table$Variable=="OilPriceAdj3"] <- "Oil Price"
sensitivity_table$Variable[sensitivity_table$Variable=="OilPriceAdj4"] <- "Oil Price"
sensitivity_table$Variable[sensitivity_table$Variable=="interaction"] <- "Interaction"
sensitivity_table$Variable[sensitivity_table$Variable=="Tourism1"] <- "Tourism"
sensitivity_table$Variable[sensitivity_table$Variable=="Tourism2"] <- "Tourism"
sensitivity_table$Variable[sensitivity_table$Variable=="Tourism3"] <- "Tourism"
sensitivity_table$Variable[sensitivity_table$Variable=="Tourism4"] <- "Tourism"
sensitivity_table$Variable[sensitivity_table$Variable=="funding"] <- "Funding"
sensitivity_table$model[sensitivity_table$model=="No_Precip"] <- "No Precip."
sensitivity_table$model[sensitivity_table$model=="no_precipitation"] <- "No Precip."
sensitivity_table$model[sensitivity_table$model=="funding"] <- "Funding, 2004-2011"
sensitivity_table$model[sensitivity_table$model=="interaction"] <- "Interaction"
sensitivity_table$model[sensitivity_table$model=="fund"] <- "Funding, 2004-2011"
sensitivity_table$model[sensitivity_table$model=="funding2"] <- "No Funding, 2004-2011"

sensitivity_table <- sensitivity_table %>% dplyr::select(-Model)
sensitivity_table <- rbind(agg_models4,sensitivity_table)

regression_table$Location <- "Inside PA"

write.csv(regression_table, "Outputs/regression_table.csv")
write.csv(sensitivity_table, "Outputs/sensitivity_table.csv")


```

```{r Other Precipitation Variables}
#Inside PA
mada_precip_sensitivity <- read.csv("~/Documents/GitHub/TourismDeforestationSubmission/Data/precip_sensitivity.csv")
mada_precip_sensitivity$min <- mada_precip_sensitivity$min *12
mada_precip_sensitivity$max <- mada_precip_sensitivity$max *12
mada_precip_sensitivity$Year <- as.character(mada_precip_sensitivity$Year)
master_commune$Year <- as.character(master_commune$Year)

master_full_precip <- dplyr::left_join(master_full, mada_precip_sensitivity, by=c("Name", "Year")) 

fem_full_formula_min <- as.formula("PercentDeforest ~ Tourism + PopDensity + min + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full_min <- felm(fem_full_formula_min, data =master_full_precip)
summary_dd_reg_full_min <- summary(dd_reg_full_min)
dd_reg_full_coef_min <- as.data.frame(summary_dd_reg_full_min$coefficients)
dd_reg_full_coef_min$model <- "Min"

fem_full_formula_max <- as.formula("PercentDeforest ~ Tourism + PopDensity + max + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full_max <- felm(fem_full_formula_max, data =master_full_precip)
summary_dd_reg_full_max <- summary(dd_reg_full_max)
dd_reg_full_coef_max <- as.data.frame(summary_dd_reg_full_max$coefficients)
dd_reg_full_coef_max$model <- "Max"

fem_full_formula_seasonality <- as.formula("PercentDeforest ~ Tourism + PopDensity + seasonality + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_full_seasonality <- felm(fem_full_formula_seasonality, data =master_full_precip)
summary_dd_reg_full_seasonality <- summary(dd_reg_full_seasonality)
dd_reg_full_coef_seasonality <- as.data.frame(summary_dd_reg_full_seasonality$coefficients)
dd_reg_full_coef_seasonality$model <- "Seasonality"

precipitation_sensitivity <- rbind(dd_reg_full_coef_min, dd_reg_full_coef_max, dd_reg_full_coef_seasonality)

precipitation_sensitivity$Variable <- rep(c("Tourism", "Pop. Density", "Precipitation", "Oil Price"),3)
precipitation_sensitivity$Location <- "Inside PA"


precipitation_sensitivity$`Pr(>|t|)` <- round(precipitation_sensitivity$`Pr(>|t|)`, digits = 3)
precipitation_sensitivity$Estimate <- round(precipitation_sensitivity$Estimate, digits = 3)
precipitation_sensitivity$`Cluster s.e.` <- round(precipitation_sensitivity$`Cluster s.e.`, digits = 3)
precipitation_sensitivity$`t value` <- round(precipitation_sensitivity$`t value`, digits = 3)

write.csv(precipitation_sensitivity, "Outputs/precipitation_sensitivity.csv")




```