# # # # # # # # # # # # # # # # # EN ANALYSIS # # # # # # # # # # # # # # # # # # # PACKAGES USED library(rstatix) library(dplyr) library(car) library(lsr) library(pastecs) # # DATA eptic_en <- read.csv("texts_with_sttr_en.csv") summary(eptic_en) # direction text_type sttr # Length:72 Length:72 Min. :0.3820 # Class :character Class :character 1st Qu.:0.4146 # Mode :character Mode :character Median :0.4274 # Mean :0.4295 # 3rd Qu.:0.4441 # Max. :0.4839 # # OUTLIERS # Identifying outliers outliers <- eptic_en %>% group_by(text_type) %>% identify_outliers(sttr) # https://www.rdocumentation.org/packages/rstatix/versions/0.7.2/topics/identify_outliers # Removing outliers eptic_en <- eptic_en %>% anti_join(outliers, by = c("text_type", "direction", "sttr")) # https://search.r-project.org/CRAN/refmans/dplyr/html/filter-joins.html # # DESCRIPTIVES stats <- by(eptic_en$sttr, eptic_en$text_type, stat.desc) # https://www.rdocumentation.org/packages/pastecs/versions/1.3.21/topics/stat.desc rounded_stats <- lapply(stats, round, 3) rounded_stats # $en_sp_st # nbr.val nbr.null nbr.na min max range sum median mean SE.mean CI.mean.0.95 var std.dev coef.var # 20.000 0.000 0.000 0.382 0.478 0.096 8.517 0.422 0.426 0.006 0.012 0.001 0.025 0.058 # $en_sp_tt # nbr.val nbr.null nbr.na min max range sum median mean SE.mean CI.mean.0.95 var std.dev coef.var # 15.000 0.000 0.000 0.387 0.428 0.041 6.151 0.408 0.410 0.003 0.007 0.000 0.013 0.031 # $en_wr_st # nbr.val nbr.null nbr.na min max range sum median mean SE.mean CI.mean.0.95 var std.dev coef.var # 18.000 0.000 0.000 0.403 0.477 0.074 7.867 0.435 0.437 0.004 0.009 0.000 0.019 0.043 # $en_wr_tt # nbr.val nbr.null nbr.na min max range sum median mean SE.mean CI.mean.0.95 var std.dev coef.var # 17.000 0.000 0.000 0.421 0.484 0.063 7.552 0.442 0.444 0.004 0.008 0.000 0.016 0.037 # # NORMALITY TESTS # Checking normality of the STTRs, by text_type tapply(eptic_en$sttr, eptic_en$text_type, shapiro.test) # $en_sp_st # data: X[[i]] # W = 0.97619, p-value = 0.876 # $en_sp_tt # data: X[[i]] # W = 0.95245, p-value = 0.5638 # $en_wr_st # data: X[[i]] # W = 0.94034, p-value = 0.2937 # $en_wr_tt # data: X[[i]] # W = 0.9447, p-value = 0.3782 # # VARIANCES leveneTest(eptic_en$sttr ~ eptic_en$text_type) # Levene's Test for Homogeneity of Variance (center = median) # Df F value Pr(>F) # group 3 2.0487 0.1155 # 66 # # BOXPLOT boxplot(eptic_en$sttr ~ eptic_en$text_type, main = "Boxplots of STTR by Text Type", xlab = "Text Type", ylab = "STTR", col = c(gray(0.9), gray(0.7), gray(0.5), gray(0.3))) # # ANOVA anova_english <- aov(eptic_en$sttr ~ eptic_en$text_type) # ANOVA assumptions are met so we can use aov() summary(anova_english) # Df Sum Sq Mean Sq F value Pr(>F) # text_type 3 0.01064 0.003547 9.584 2.45e-05 *** # Residuals 66 0.02442 0.000370 # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # ANOVA EFFECT SIZE (OMEGA SQUARED) # The function is from here https://stats.stackexchange.com/questions/2962/omega-squared-for-measure-of-effect-in-r omega_sq <- function(aovm){ sum_stats <- summary(aovm)[[1]] SSm <- sum_stats[["Sum Sq"]][1] SSr <- sum_stats[["Sum Sq"]][2] DFm <- sum_stats[["Df"]][1] MSr <- sum_stats[["Mean Sq"]][2] W2 <- (SSm-DFm*MSr)/(SSm+SSr+MSr) return(W2) } omega_sq(anova_english) # 0.2689463 # # T-TESTS (UNCORRECTED) t.test(eptic_en$sttr[eptic_en$text_type=="en_sp_tt"], eptic_en$sttr[eptic_en$text_type=="en_wr_tt"], alternative = "two.sided", var.equal = TRUE) # t = -6.5267, df = 30, p-value = 3.231e-07 t.test(eptic_en$sttr[eptic_en$text_type=="en_sp_st"], eptic_en$sttr[eptic_en$text_type=="en_wr_st"], alternative = "two.sided", var.equal = TRUE) # t = -1.5494, df = 36, p-value = 0.13 t.test(eptic_en$sttr[eptic_en$text_type=="en_sp_tt"], eptic_en$sttr[eptic_en$text_type=="en_sp_st"], alternative = "two.sided", var.equal = TRUE) # t = -2.242, df = 33, p-value = 0.0318 t.test(eptic_en$sttr[eptic_en$text_type=="en_wr_tt"], eptic_en$sttr[eptic_en$text_type=="en_wr_st"], alternative = "two.sided", var.equal = TRUE) # t = 1.194, df = 33, p-value = 0.241 # # BONF. CORRECTIONS p.adjust(3.231e-07, method='bonferroni', n=4) # https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/p.adjust # 1.2924e-06 p.adjust(0.13, method='bonferroni', n=4) # 0.52 p.adjust(0.0318, method='bonferroni', n=4) # 0.1272 p.adjust(0.241, method='bonferroni', n=4) # 0.964 # # T-TESTS EFFECT SIZE (COHEN'S D) cohensD(eptic_en$sttr[eptic_en$text_type=="en_sp_tt"], eptic_en$sttr[eptic_en$text_type=="en_wr_tt"]) # 2.312066 cohensD(eptic_en$sttr[eptic_en$text_type=="en_sp_st"], eptic_en$sttr[eptic_en$text_type=="en_wr_st"]) # 0.5033751 cohensD(eptic_en$sttr[eptic_en$text_type=="en_sp_tt"], eptic_en$sttr[eptic_en$text_type=="en_sp_st"]) # 0.7657748 cohensD(eptic_en$sttr[eptic_en$text_type=="en_wr_tt"], eptic_en$sttr[eptic_en$text_type=="en_wr_st"]) # 0.4038092