# # # # # # # # # # # # # # # # # IT ANALYSIS # # # # # # # # # # # # # # # # # # # PACKAGES USED library(rstatix) library(dplyr) library(car) library(lsr) library(pastecs) # # DATA eptic_it <- read.csv("texts_with_sttr_it.csv") summary(eptic_it) # direction text_type sttr # Length:69 Length:69 Min. :0.4290 # Class :character Class :character 1st Qu.:0.4730 # Mode :character Mode :character Median :0.4940 # Mean :0.4897 # 3rd Qu.:0.5030 # Max. :0.5590 # # OUTLIERS # Identifying outliers outliers <- eptic_it %>% group_by(text_type) %>% identify_outliers(sttr) # https://www.rdocumentation.org/packages/rstatix/versions/0.7.2/topics/identify_outliers # Removing outliers eptic_it <- eptic_it %>% anti_join(outliers, by = c("text_type", "direction", "sttr")) # https://search.r-project.org/CRAN/refmans/dplyr/html/filter-joins.html # # DESCRIPTIVES stats <- by(eptic_it$sttr, eptic_it$text_type, stat.desc) # https://www.rdocumentation.org/packages/pastecs/versions/1.3.21/topics/stat.desc rounded_stats <- lapply(stats, round, 3) rounded_stats # $it_sp_st # nbr.val nbr.null nbr.na min max range sum median mean SE.mean CI.mean.0.95 var std.dev coef.var # 14.000 0.000 0.000 0.473 0.510 0.037 6.947 0.498 0.496 0.003 0.006 0.000 0.010 0.020 # $it_sp_tt # nbr.val nbr.null nbr.na min max range sum median mean SE.mean CI.mean.0.95 var std.dev coef.var # 17.000 0.000 0.000 0.429 0.508 0.079 7.805 0.465 0.459 0.006 0.012 0.001 0.023 0.050 # $it_wr_st # nbr.val nbr.null nbr.na min max range sum median mean SE.mean CI.mean.0.95 var std.dev coef.var # 17.000 0.000 0.000 0.463 0.538 0.075 8.488 0.503 0.499 0.005 0.010 0.000 0.019 0.038 # $it_wr_tt # nbr.val nbr.null nbr.na min max range sum median mean SE.mean CI.mean.0.95 var std.dev coef.var # 17.000 0.000 0.000 0.463 0.527 0.064 8.453 0.495 0.497 0.004 0.009 0.000 0.018 0.036 # # NORMALITY TESTS # Checking normality of the STTRs, by text_type tapply(eptic_it$sttr, eptic_it$text_type, shapiro.test) # $it_sp_st # data: X[[i]] # W = 0.9421, p-value = 0.4459 # $it_sp_tt # data: X[[i]] # W = 0.92009, p-value = 0.1482 # $it_wr_st # data: X[[i]] # W = 0.96484, p-value = 0.7235 # $it_wr_tt # data: X[[i]] # W = 0.96958, p-value = 0.811 # # VARIANCES leveneTest(eptic_it$sttr ~ eptic_it$text_type) # Levene's Test for Homogeneity of Variance (center = median) # Df F value Pr(>F) # group 3 2.511 0.06696 . # 61 # # BOXPLOT boxplot(eptic_it$sttr ~ eptic_it$text_type, main = "Boxplots of STTR by Text Type", xlab = "Text Type", ylab = "STTR", col = c(gray(0.9), gray(0.7), gray(0.5), gray(0.3))) # # ANOVA anova_italian <- aov(eptic_it$sttr ~ eptic_it$text_type) # ANOVA assumptions are met so we can use aov() summary(anova_italian) # Df Sum Sq Mean Sq F value Pr(>F) # text_type 3 0.01873 0.006244 18.42 1.27e-08 *** # Residuals 61 0.02068 0.000339 # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # ANOVA EFFECT SIZE (OMEGA SQUARED) # The function is from here https://stats.stackexchange.com/questions/2962/omega-squared-for-measure-of-effect-in-r omega_sq <- function(aovm){ sum_stats <- summary(aovm)[[1]] SSm <- sum_stats[["Sum Sq"]][1] SSr <- sum_stats[["Sum Sq"]][2] DFm <- sum_stats[["Df"]][1] MSr <- sum_stats[["Mean Sq"]][2] W2 <- (SSm-DFm*MSr)/(SSm+SSr+MSr) return(W2) } omega_sq(anova_italian) # 0.4456538 # # T-TESTS (UNCORRECTED) t.test(eptic_it$sttr[eptic_it$text_type=="it_sp_tt"], eptic_it$sttr[eptic_it$text_type=="it_wr_tt"], alternative = "two.sided", var.equal = TRUE) # t = -5.3921, df = 32, p-value = 6.349e-06 t.test(eptic_it$sttr[eptic_it$text_type=="it_sp_st"], eptic_it$sttr[eptic_it$text_type=="it_wr_st"], alternative = "two.sided", var.equal = TRUE) # t = -0.54585, df = 29, p-value = 0.5893 t.test(eptic_it$sttr[eptic_it$text_type=="it_sp_tt"], eptic_it$sttr[eptic_it$text_type=="it_sp_st"], alternative = "two.sided", var.equal = TRUE) # t = -5.6272, df = 29, p-value = 4.449e-06 t.test(eptic_it$sttr[eptic_it$text_type=="it_wr_tt"], eptic_it$sttr[eptic_it$text_type=="it_wr_st"], alternative = "two.sided", var.equal = TRUE) # t = -0.32368, df = 32, p-value = 0.7483 # # BONF. CORRECTIONS p.adjust(6.349e-06, method='bonferroni', n=4) # 2.5396e-05 p.adjust(0.5893, method='bonferroni', n=4) # 1 p.adjust(4.449e-06, method='bonferroni', n=4) # 1.7796e-05 p.adjust(0.7483, method='bonferroni', n=4) # 1 # # T-TESTS EFFECT SIZE (COHEN'S D) cohensD(eptic_it$sttr[eptic_it$text_type=="it_sp_tt"], eptic_it$sttr[eptic_it$text_type=="it_wr_tt"]) # 1.849465 cohensD(eptic_it$sttr[eptic_it$text_type=="it_sp_st"], eptic_it$sttr[eptic_it$text_type=="it_wr_st"]) # 0.1970006 cohensD(eptic_it$sttr[eptic_it$text_type=="it_sp_tt"], eptic_it$sttr[eptic_it$text_type=="it_sp_st"]) # 2.030874 cohensD(eptic_it$sttr[eptic_it$text_type=="it_wr_tt"], eptic_it$sttr[eptic_it$text_type=="it_wr_st"]) # 0.1110215