---
title: "03_HeterogeneityAnalysis"
output: html_document
date: "2024-02-15"
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
TWFE relies on the assumption of linear additive effects. If this assumption is not met, our estimator may be biased. For example, effects may not be additive and linear because we do not control for pre-treatment deforestation. To test this assumption, we add a heterogeneity analysis. To see how impacts vary with number of tourists, we transform the key tourist visit variable to percentage and then interact it with the absolute tourist visit values. We conduct this analysis for the total PA area inside the PA, in the 3km buffer, and in the total PA and 3km buffer combined. We also conduct an analysis where the rival explanations (precipitation, population density, and mean oil price) are also interacted with percentage tourism.
```{r}
#load packages
library(tidyverse)
library(lfe)
#load data
master_full <- read.csv("DataKeep/MasterSheet_full.csv")
master_full$precipitation <- master_full$precipitation*12
master_full$Tourism <- master_full$Tourism/1000
master_full$PercentDeforest <- master_full$PercentDeforest*100
master_buffer_analysis_3km <- read.csv( "DataKeep/master_buffer_analysis_3km.csv")
master_buffer <- master_buffer_analysis_3km
master_buffer_combined <- read.csv("DataKeep/master_buffer_combined.csv")
tourism <- read.csv("~/Documents/GitHub/TourismDeforestation/Data/tourism_deforestation.csv")
master_full$Tourism[is.na(master_full$Tourism)] <- 0
master_buffer$Tourism[is.na(master_buffer$Tourism)] <- 0
master_buffer_combined$Tourism[is.na(master_buffer_combined$Tourism)] <- 0
tourism$Tourism[is.na(tourism$Tourism)] <- 0
```
```{r Categorical}
total_tourism <- master_full_tm1 %>% select(Tourism, Name) %>%group_by(Name) %>% summarise(TotalTourism = sum(Tourism))
sort(total_tourism$TotalTourism)
total_tourism$TourismLevel <- total_tourism$TotalTourism
total_tourism$TourismLevel[total_tourism$TotalTourism<10 & total_tourism$TotalTourism>0] <- "Medium"
total_tourism$TourismLevel[total_tourism$TotalTourism==0] <- "None"
total_tourism$TourismLevel[total_tourism$TotalTourism>10] <- "High"
total_tourism$TourismLevelMedium <- total_tourism$TourismLevel
total_tourism$TourismLevelMedium[total_tourism$TourismLevelMedium=="Medium"] <- "aMedium"
master_full_cat <- left_join(master_full_tm1,total_tourism, by="Name")
#Inside
fem_formula_tourismcat <- as.formula("PercentDeforest ~ Tourism + Tourism:TourismLevel + PopDensity + precipitation+ OilPriceAdj + PopDensity + precipitation+OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_tourismcat <- felm(fem_formula_tourismcat, data = master_full_cat)
summary_dd_reg_tourismcat <- summary(dd_reg_tourismcat)
dd_reg_full_coef_tourismcat <- as.data.frame(summary_dd_reg_tourismcat$coefficients)
dd_reg_full_coef_tourismcat$Location <- "Inside"
dd_reg_full_coef_tourismcat$Variable <- rownames(dd_reg_full_coef_tourismcat)
fem_formula_tourismcat2 <- as.formula("PercentDeforest ~ Tourism + Tourism:TourismLevelMedium + PopDensity + precipitation+ OilPriceAdj + PopDensity + precipitation+OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_tourismcat2 <- felm(fem_formula_tourismcat2, data = master_full_cat)
summary_dd_reg_tourismcat2 <- summary(dd_reg_tourismcat2)
dd_reg_full_coef_tourismcat2 <- as.data.frame(summary_dd_reg_tourismcat2$coefficients)
dd_reg_full_coef_tourismcat2$Location <- "Inside"
#no significant effect of tourism proportion inside PA but it is near to marginally significant and the effect size is consistent with when there is no covseraction, covseraction terms not significant
#Buffer
master_full_cat_buffer <- left_join(master_buffer_tm1,total_tourism, by="Name")
fem_formula_tourismprop_buffercovs <- as.formula("PercentDeforest ~ Tourism + Tourism:TourismLevel + PopDensity + Precipitation+ OilPriceAdj+ PopDensity + Precipitation + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_tourismprop_buffercovs <- felm(fem_formula_tourismprop_buffercovs, data = master_full_cat_buffer)
summary_dd_reg_tourismprop_buffercovs <- summary(dd_reg_tourismprop_buffercovs)
dd_reg_full_coef_tourismprop_buffercovs <- as.data.frame(summary_dd_reg_tourismprop_buffercovs$coefficients)
dd_reg_full_coef_tourismprop_buffercovs$Location <- "Buffer"
dd_reg_full_coef_tourismprop_buffercovs$Variable <- rownames(dd_reg_full_coef_tourismprop_buffercovs)
fem_formula_tourismprop_buffercovs2 <- as.formula("PercentDeforest ~ Tourism + Tourism:TourismLevelMedium + PopDensity + Precipitation+ OilPriceAdj+ PopDensity + Precipitation + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_tourismprop_buffercovs2 <- felm(fem_formula_tourismprop_buffercovs2, data = master_full_cat_buffer)
summary_dd_reg_tourismprop_buffercovs2 <- summary(dd_reg_tourismprop_buffercovs2)
dd_reg_full_coef_tourismprop_buffercovs2 <- as.data.frame(summary_dd_reg_tourismprop_buffercovs2$coefficients)
dd_reg_full_coef_tourismprop_buffercovs2$Location <- "Buffer"
#significant effect of tourism proportion in PA buffer, interaction terms not significant
#Combined
master_full_cat_combined <- left_join(master_buffer_combined_tm1,total_tourism, by="Name")
fem_formula_tourismprop_buffer_combinedcovs <- as.formula("PercentDeforest ~ Tourism + Tourism:TourismLevel + PopDensity+ combined_precip + OilPriceAdj + PopDensity +combined_precip + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_tourismprop_buffer_combinedcovs <- felm(fem_formula_tourismprop_buffer_combinedcovs, data = master_full_cat_combined)
summary_dd_reg_tourismprop_buffer_combinedcovs <- summary(dd_reg_tourismprop_buffer_combinedcovs)
dd_reg_full_coef_tourismprop_buffer_combinedcovs <- as.data.frame(summary_dd_reg_tourismprop_buffer_combinedcovs$coefficients)
dd_reg_full_coef_tourismprop_buffer_combinedcovs$Location <- "Combined"
dd_reg_full_coef_tourismprop_buffer_combinedcovs$Variable <- rownames(dd_reg_full_coef_tourismprop_buffer_combinedcovs)
fem_formula_tourismprop_buffer_combinedcovs2 <- as.formula("PercentDeforest ~ Tourism + Tourism:TourismLevelMedium + PopDensity+ combined_precip + OilPriceAdj + PopDensity +combined_precip + OilPriceAdj | Year + Name | 0 | Regions")
dd_reg_tourismprop_buffer_combinedcovs2 <- felm(fem_formula_tourismprop_buffer_combinedcovs2, data = master_full_cat_combined)
summary_dd_reg_tourismprop_buffer_combinedcovs2 <- summary(dd_reg_tourismprop_buffer_combinedcovs2)
dd_reg_full_coef_tourismprop_buffer_combinedcovs2 <- as.data.frame(summary_dd_reg_tourismprop_buffer_combinedcovs2$coefficients)
dd_reg_full_coef_tourismprop_buffer_combinedcovs2$Location <- "Combined"
#no significant effect of tourism proportion in PA buffer combined with inside PA, covseraction terms not significant
categorical_df <- rbind(dd_reg_full_coef_tourismcat, dd_reg_full_coef_tourismprop_buffercovs, dd_reg_full_coef_tourismprop_buffer_combinedcovs)
categorical_df$Variable[categorical_df$Variable=="precipitation"] <- "Precipitation"
categorical_df$Variable[categorical_df$Variable=="OilPriceAdj"] <- "Oil Price"
categorical_df <- categorical_df %>% select(Location, Variable, Estimate, `Cluster s.e.`, `t value`, `Pr(>|t|)`)
categorical_df$Estimate <- round(categorical_df$Estimate, 3)
categorical_df$`Cluster s.e.` <- round(categorical_df$`Cluster s.e.`, 3)
categorical_df$`t value` <- round(categorical_df$`t value`, 3)
categorical_df$`Pr(>|t|)` <- round(categorical_df$`Pr(>|t|)`, 3)
write.csv(categorical_df, "Outputs/heterogeneity_analysis.csv")
```