#functions required for visualization #read_and_merge_file needs dataframe *.softclips.parsed and table "gene_biotype" in the following format: #| ensembl_gene_id | gene_biotype | transcript_start | transcript_end | chromosome_name | external_gene_name | strand | #strand 1 (positive) or -1 (negative) read_and_merge_file <- function(f) { df = read.csv(f, sep ='\t', header = FALSE,na.strings=c("", " ","NA")) print("File read") colnames(df)[1] <- "id" colnames(df)[2] <- "clip5" colnames(df)[3] <- "clip3" colnames(df)[4] <- "pos" colnames(df)[5] <- "chr" merge_soft_clip_R = sqldf(" SELECT * FROM df d1 JOIN gene_biotype d2 ON d1.chr = d2.chromosome_name AND d1.pos BETWEEN CAST(d2.transcript_start AS int) AND (d2.transcript_end) ") print("Files merged") merge_soft_clip_R$chromosome_name <- NULL return(merge_soft_clip_R) } #Funktion decode decode <- function(gene) { gene$clip3_tail_len_decoded <- case_when( gene$clip3_tail==1000 ~ "other", gene$clip3_tail==1003 ~ "poliAU in reads and shorter reads", gene$clip3_tail==2000 ~ "no tail", gene$clip3_tail==1002 ~ "poliA in reads and shorter reads", gene$clip3_tail==1001 ~ "poliU in reads and shorter reads", gene$clip3_tail>=10 & gene$clip3_tail<1000 ~ "10+", TRUE ~ as.character(gene$clip3_tail) ) gene$tail <- case_when( gene$clip3_tail==1000 ~ 0, gene$clip3_tail==1003 ~ 1, gene$clip3_tail==2000 ~ 0, gene$clip3_tail==1002 ~ 1, gene$clip3_tail==1001 ~ 1, gene$clip3_tail>=10 & gene$clip3_tail<1000 ~ 1, TRUE ~ 1 ) gene$clip3_tail_len_range <- case_when( gene$clip3_tail_len>=1 & gene$clip3_tail_len<=10 ~ "1-10", gene$clip3_tail_len>=11 & gene$clip3_tail_len<=20 ~ "11-20", gene$clip3_tail_len>=21 & gene$clip3_tail_len<=30 ~ "21-30", gene$clip3_tail_len>=31 & gene$clip3_tail_len<=40 ~ "31-40", gene$clip3_tail_len>=41 & gene$clip3_tail_len<=50 ~ "41-50", gene$clip3_tail_len>=51 & gene$clip3_tail_len<=60 ~ "51-60", gene$clip3_tail_len>=61 & gene$clip3_tail_len<2000 ~ "61+", gene$clip3_tail_len == 0 ~ "0", TRUE ~ as.character(gene$clip3_tail_len) ) return(gene) } #Function tail_analysis_for_genes_0t #tails are read from the reads which 3'end is present in chosen window #here window has size from transcript_end to transcript_end - 24nt tail_analysis_for_genes_0t <- function (gene_name, df2) { gene <- subset(df2, external_gene_name == gene_name) gene$clip3<- ifelse(is.na(gene$clip3),as.character(0),gene$clip3) gene$clip3_tail <- case_when( #no tail at all gene$clip3=="0" ~ case_when( #no tail gene$pos == gene$transcript_end ~ 2000, gene$pos < gene$transcript_end & gene$pos >= gene$transcript_end-24 ~ 2000, gene$pos != gene$transcript_end ~ 1000 ), str_count(gene$clip3,"T")!=0 & str_count(gene$clip3,"T") == nchar(gene$clip3) ~ case_when( #U-tail gene$pos == gene$transcript_end ~ 1001, gene$pos < gene$transcript_end & gene$pos >= gene$transcript_end-24 ~ 1001, gene$pos != gene$transcript_end ~ 1000 ), str_count(gene$clip3,"A")!=0 & str_count(gene$clip3,"A") == nchar(gene$clip3) ~ case_when( #A-tail gene$pos == gene$transcript_end ~ 1002, gene$pos < gene$transcript_end & gene$pos >= gene$transcript_end-24 ~ 1002, gene$pos != gene$transcript_end ~ 1000 ), str_count(gene$clip3, regex("[^GC]")) & str_count(gene$clip3, regex("^A")) & str_count(gene$clip3, regex("T$")) & str_count(gene$clip3,"A|T")!=0 & str_count(gene$clip3,"A|T") == nchar(gene$clip3) ~ case_when( #AU-tail gene$pos == gene$transcript_end ~ 1003, gene$pos < gene$transcript_end & gene$pos >= gene$transcript_end-24 ~ 1003, gene$pos != gene$transcript_end ~ 1000 ), #other TRUE ~ case_when( #heterogenous tail gene$pos == gene$transcript_end ~ 1000, gene$pos != gene$transcript_end ~ 1000 ) ) gene$clip3_tail_len <- case_when( #no tail at all gene$clip3=="0" ~ case_when( #no tail gene$pos == gene$transcript_end ~ 0, gene$pos < gene$transcript_end & gene$pos >= gene$transcript_end-24 ~ 0, gene$pos != gene$transcript_end ~ 0 ), str_count(gene$clip3,"T")!=0 & str_count(gene$clip3,"T") == nchar(gene$clip3) ~ case_when( #U-tail gene$pos == gene$transcript_end ~ nchar(gene$clip3)+0, gene$pos < gene$transcript_end & gene$pos >= gene$transcript_end-24 ~ nchar(gene$clip3)+0, gene$pos != gene$transcript_end ~ nchar(gene$clip3)+0 ), str_count(gene$clip3,"A")!=0 & str_count(gene$clip3,"A") == nchar(gene$clip3) ~ case_when( #A-tail gene$pos == gene$transcript_end ~ nchar(gene$clip3)+0, gene$pos < gene$transcript_end & gene$pos >= gene$transcript_end-24 ~ nchar(gene$clip3)+0, gene$pos != gene$transcript_end ~ nchar(gene$clip3)+0 ), str_count(gene$clip3, regex("[^GC]")) & str_count(gene$clip3, regex("^A")) & str_count(gene$clip3, regex("T$")) & str_count(gene$clip3,"A|T")!=0 & str_count(gene$clip3,"A|T") == nchar(gene$clip3) ~ case_when( #AU-tail gene$pos == gene$transcript_end ~ nchar(gene$clip3)+0, gene$pos < gene$transcript_end & gene$pos >= gene$transcript_end-24 ~ nchar(gene$clip3)+0, gene$pos != gene$transcript_end ~ nchar(gene$clip3)+0 ), #other TRUE ~ case_when( #heterogenous tail gene$pos == gene$transcript_end ~ nchar(gene$clip3)+0, gene$pos != gene$transcript_end ~ nchar(gene$clip3)+0 ) ) gene$clip3_tail_len_UinAU <- str_count(str_extract(gene$clip3, regex("T*$"))) gene <- decode(gene) return(gene) } #Function for tails length ploting #tail_code: "1002", polyA; "1003", polyAU; "1001", polyU. gene_name in "" tail_length_fun <- function(tail_code, gene_name, tail_name){ len = len_LINE1_NGS4[len_LINE1_NGS4$clip3_tail == tail_code,] percent_len <- data.frame(no_row=c(nrow(len[len$clip3_tail_len <= 32 & len$sample == "WT_10",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "WT_10",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "WT_10",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "WT_12",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "WT_12",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "WT_12",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "WT_13",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "WT_13",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "WT_13",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "X1KO_14",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "X1KO_14",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "X1KO_14",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "X1KO_16",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "X1KO_16",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "X1KO_16",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "X1KO_18",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "X1KO_18",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "X1KO_18",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2KO_19",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2KO_19",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "D2KO_19",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2KO_20",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2KO_20",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "D2KO_20",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2KO_21",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2KO_21",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "D2KO_21",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2X1KO_22",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2X1KO_22",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "D2X1KO_22",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2X1KO_23",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2X1KO_23",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "D2X1KO_23",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2X1KO_24",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2X1KO_24",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "D2X1KO_24",])), tail_class=c(rep(c("short (<=32)", "mid (33-64)", "long (>64)"),12)), sample=c(rep("WT_10",3),rep("WT_12",3),rep("WT_13",3),rep("X1KO_14",3),rep("X1KO_16",3),rep("X1KO_18",3),rep("D2KO_19",3),rep("D2KO_20",3),rep("D2KO_21",3),rep("D2X1KO_22",3),rep("D2X1KO_23",3),rep("D2X1KO_24",3))) p1 = ggplot(percent_len, aes(fill=tail_class, y=no_row, x=sample)) + geom_bar(position="fill", stat="identity") + ggtitle(paste(gene_name,tail_name)) + theme(axis.text.x=element_text(angle=45, hjust=1), axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + scale_fill_manual(values=c("short (<=32)" = "#AED66B", "mid (33-64)" = "#FD8D3C", "long (>64)" = "#6BAED6"), name = "Tail length", breaks = c("long (>64)", "mid (33-64)", "short (<=32)")) + ylab("Percentage of tail lengths") + scale_x_discrete(limits = c("WT_10","WT_12","WT_13","X1KO_14","X1KO_16","X1KO_18","D2KO_19","D2KO_20","D2KO_21","D2X1KO_22","D2X1KO_23","D2X1KO_24")) p2 = ggplot(len, aes(x=clip3_tail_len, color=sample)) + geom_density(adjust = 2) + ggtitle(paste(gene_name,tail_name)) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab(paste(tail_name,"tail length")) + scale_color_discrete(breaks = c("WT_10","WT_12","WT_13","X1KO_14","X1KO_16","X1KO_18","D2KO_19","D2KO_20","D2KO_21","D2X1KO_22","D2X1KO_23","D2X1KO_24")) p3 = ggplot(len[len$clip3_tail_len <= 64,], aes(x=clip3_tail_len, color=sample)) + geom_density(adjust = 2) + ggtitle(paste(gene_name,tail_name)) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab(paste(tail_name,"tail length")) + scale_color_discrete(breaks = c("WT_10","WT_12","WT_13","X1KO_14","X1KO_16","X1KO_18","D2KO_19","D2KO_20","D2KO_21","D2X1KO_22","D2X1KO_23","D2X1KO_24")) p4 = ggplot(len, aes(x = clip3_tail_len, y = sample)) + geom_density_ridges(aes(fill = sample), scale = 0.9, quantile_lines = TRUE) + ggtitle(paste(gene_name,tail_name)) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab(paste(tail_name,"tail length")) p4_1 = ggplot(len[len$clip3_tail_len <= 64,], aes(x = clip3_tail_len, y = sample)) + geom_density_ridges(aes(fill = sample), scale = 0.9, quantile_lines = TRUE) + ggtitle(paste(gene_name,tail_name)) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab(paste(tail_name,"tail length")) p5 = ggplot(len, aes(x = clip3_tail_len, color=sample)) + stat_ecdf() + ggtitle(paste(gene_name,tail_name)) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab(paste(tail_name,"tail length")) + scale_color_discrete(breaks = c("WT_10","WT_12","WT_13","X1KO_14","X1KO_16","X1KO_18","D2KO_19","D2KO_20","D2KO_21","D2X1KO_22","D2X1KO_23","D2X1KO_24")) p5_1 = ggplot(len[len$clip3_tail_len <= 64,], aes(x = clip3_tail_len, color=sample)) + stat_ecdf() + ggtitle(paste(gene_name,tail_name)) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab(paste(tail_name,"tail length")) + scale_color_discrete(breaks = c("WT_10","WT_12","WT_13","X1KO_14","X1KO_16","X1KO_18","D2KO_19","D2KO_20","D2KO_21","D2X1KO_22","D2X1KO_23","D2X1KO_24")) test1 <- c((nrow(len[len$clip3_tail_len <= 32 & len$sample == "WT_10",])*100)/nrow(len[len$sample == "WT_10",]), (nrow(len[len$clip3_tail_len <= 32 & len$sample == "WT_12",])*100)/nrow(len[len$sample == "WT_12",]), (nrow(len[len$clip3_tail_len <= 32 & len$sample == "WT_13",])*100)/nrow(len[len$sample == "WT_13",]) ) test2 <- c((nrow(len[len$clip3_tail_len <= 32 & len$sample == "X1KO_14",])*100)/nrow(len[len$sample == "X1KO_14",]), (nrow(len[len$clip3_tail_len <= 32 & len$sample == "X1KO_16",])*100)/nrow(len[len$sample == "X1KO_16",]), (nrow(len[len$clip3_tail_len <= 32 & len$sample == "X1KO_18",])*100)/nrow(len[len$sample == "X1KO_18",]) ) test3 <- c((nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2KO_19",])*100)/nrow(len[len$sample == "D2KO_19",]), (nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2KO_20",])*100)/nrow(len[len$sample == "D2KO_20",]), (nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2KO_21",])*100)/nrow(len[len$sample == "D2KO_21",]) ) test4 <- c((nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2X1KO_22",])*100)/nrow(len[len$sample == "D2X1KO_22",]), (nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2X1KO_23",])*100)/nrow(len[len$sample == "D2X1KO_23",]), (nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2X1KO_24",])*100)/nrow(len[len$sample == "D2X1KO_24",]) ) if(mean(test1) != mean(test2)){ test32 <- t.test(test1,test2) } else { test32 <- "t.test not performed - equel values" } if(mean(test1) != mean(test3)){ test33 <- t.test(test1,test3) } else { test33 <- "t.test not performed - equel values" } if(mean(test1) != mean(test4)){ test34 <- t.test(test1,test4) } else { test34 <- "t.test not performed - equel values" } if(mean(test2) != mean(test3)){ test35 <- t.test(test2,test3) } else { test35 <- "t.test not performed - equel values" } if(mean(test2) != mean(test4)){ test36 <- t.test(test2,test4) } else { test36 <- "t.test not performed - equel values" } if(mean(test3) != mean(test4)){ test37 <- t.test(test3,test4) } else { test37 <- "t.test not performed - equel values" } test1 <- c((nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "WT_10",])*100)/nrow(len[len$sample == "WT_10",]), (nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "WT_12",])*100)/nrow(len[len$sample == "WT_12",]), (nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "WT_13",])*100)/nrow(len[len$sample == "WT_13",]) ) test2 <- c((nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "X1KO_14",])*100)/nrow(len[len$sample == "X1KO_14",]), (nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "X1KO_16",])*100)/nrow(len[len$sample == "X1KO_16",]), (nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "X1KO_18",])*100)/nrow(len[len$sample == "X1KO_18",]) ) test3 <- c((nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2KO_19",])*100)/nrow(len[len$sample == "D2KO_19",]), (nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2KO_20",])*100)/nrow(len[len$sample == "D2KO_20",]), (nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2KO_21",])*100)/nrow(len[len$sample == "D2KO_21",]) ) test4 <- c((nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2X1KO_22",])*100)/nrow(len[len$sample == "D2X1KO_22",]), (nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2X1KO_23",])*100)/nrow(len[len$sample == "D2X1KO_23",]), (nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2X1KO_24",])*100)/nrow(len[len$sample == "D2X1KO_24",]) ) if(mean(test1) != mean(test2)){ test42 <- t.test(test1,test2) } else { test42 <- "t.test not performed - equel values" } if(mean(test1) != mean(test3)){ test43 <- t.test(test1,test3) } else { test43 <- "t.test not performed - equel values" } if(mean(test1) != mean(test4)){ test44 <- t.test(test1,test4) } else { test44 <- "t.test not performed - equel values" } if(mean(test2) != mean(test3)){ test45 <- t.test(test2,test3) } else { test45 <- "t.test not performed - equel values" } if(mean(test2) != mean(test4)){ test46 <- t.test(test2,test4) } else { test46 <- "t.test not performed - equel values" } if(mean(test3) != mean(test4)){ test47 <- t.test(test3,test4) } else { test47 <- "t.test not performed - equel values" } test1 <- c((nrow(len[len$clip3_tail_len > 64 & len$sample == "WT_10",])*100)/nrow(len[len$sample == "WT_10",]), (nrow(len[len$clip3_tail_len > 64 & len$sample == "WT_12",])*100)/nrow(len[len$sample == "WT_12",]), (nrow(len[len$clip3_tail_len > 64 & len$sample == "WT_13",])*100)/nrow(len[len$sample == "WT_13",]) ) test2 <- c((nrow(len[len$clip3_tail_len > 64 & len$sample == "X1KO_14",])*100)/nrow(len[len$sample == "X1KO_14",]), (nrow(len[len$clip3_tail_len > 64 & len$sample == "X1KO_16",])*100)/nrow(len[len$sample == "X1KO_16",]), (nrow(len[len$clip3_tail_len > 64 & len$sample == "X1KO_18",])*100)/nrow(len[len$sample == "X1KO_18",]) ) test3 <- c((nrow(len[len$clip3_tail_len > 64 & len$sample == "D2KO_19",])*100)/nrow(len[len$sample == "D2KO_19",]), (nrow(len[len$clip3_tail_len > 64 & len$sample == "D2KO_20",])*100)/nrow(len[len$sample == "D2KO_20",]), (nrow(len[len$clip3_tail_len > 64 & len$sample == "D2KO_21",])*100)/nrow(len[len$sample == "D2KO_21",]) ) test4 <- c((nrow(len[len$clip3_tail_len > 64 & len$sample == "D2X1KO_22",])*100)/nrow(len[len$sample == "D2X1KO_22",]), (nrow(len[len$clip3_tail_len > 64 & len$sample == "D2X1KO_23",])*100)/nrow(len[len$sample == "D2X1KO_23",]), (nrow(len[len$clip3_tail_len > 64 & len$sample == "D2X1KO_24",])*100)/nrow(len[len$sample == "D2X1KO_24",]) ) if(mean(test1) != mean(test2)){ test62 <- t.test(test1,test2) } else { test62 <- "t.test not performed - equel values" } if(mean(test1) != mean(test3)){ test63 <- t.test(test1,test3) } else { test63 <- "t.test not performed - equel values" } if(mean(test1) != mean(test4)){ test64 <- t.test(test1,test4) } else { test64 <- "t.test not performed - equel values" } if(mean(test2) != mean(test3)){ test65 <- t.test(test2,test3) } else { test65 <- "t.test not performed - equel values" } if(mean(test2) != mean(test4)){ test66 <- t.test(test2,test4) } else { test66 <- "t.test not performed - equel values" } if(mean(test3) != mean(test4)){ test67 <- t.test(test3,test4) } else { test67 <- "t.test not performed - equel values" } print(p1) print("T-test for short tails, WT vs X1KO") print(test32) print("T-test for short tails, WT vs D2KO") print(test33) print("T-test for short tails, WT vs D2X1KO") print(test34) print("T-test for short tails, X1KO vs D2KO") print(test35) print("T-test for short tails, X1KO vs D2X1KO") print(test36) print("T-test for short tails, D2KO vs D2X1KO") print(test37) print("T-test for mid tails, WT vs X1KO") print(test42) print("T-test for mid tails, WT vs D2KO") print(test43) print("T-test for mid tails, WT vs D2X1KO") print(test44) print("T-test for mid tails, X1KO vs D2KO") print(test45) print("T-test for mid tails, X1KO vs D2X1KO") print(test46) print("T-test for mid tails, D2KO vs D2X1KO") print(test47) print("T-test for long tails, WT vs X1KO") print(test62) print("T-test for long tails, WT vs D2KO") print(test63) print("T-test for long tails, WT vs D2X1KO") print(test64) print("T-test for long tails, X1KO vs D2KO") print(test65) print("T-test for long tails, X1KO vs D2X1KO") print(test66) print("T-test for long tails, D2KO vs D2X1KO") print(test67) print(p2) print(p3) print(p4) print(p4_1) print(p5) print(p5_1) if(tail_code == "1003"){ p6 = ggplot(len[len$clip3_tail_len <= 64,], aes(x=clip3_tail_len_UinAU, color=sample)) + geom_density(adjust = 2) + ggtitle(paste(gene_name,"U in polyAU")) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab("length of U in polyAU") + scale_color_discrete(breaks = c("WT_10","WT_12","WT_13","X1KO_14","X1KO_16","X1KO_18","D2KO_19","D2KO_20","D2KO_21","D2X1KO_22","D2X1KO_23","D2X1KO_24")) p6_1 = ggplot(len[len$clip3_tail_len <= 64,], aes(x = clip3_tail_len_UinAU, y = sample)) + geom_density_ridges(aes(fill = sample), scale = 0.9, quantile_lines = TRUE) + ggtitle(paste(gene_name,"U in polyAU")) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab("length of U in polyAU") + scale_y_discrete(limits = c("WT_10","WT_12","WT_13","X1KO_14","X1KO_16","X1KO_18","D2KO_19","D2KO_20","D2KO_21","D2X1KO_22","D2X1KO_23","D2X1KO_24")) p6_2 = ggplot(len[len$clip3_tail_len <= 64,], aes(x = clip3_tail_len_UinAU, color=sample)) + stat_ecdf() + ggtitle(paste(gene_name,"U in polyAU")) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab("length of U in polyAU") + scale_color_discrete(breaks = c("WT_10","WT_12","WT_13","X1KO_14","X1KO_16","X1KO_18","D2KO_19","D2KO_20","D2KO_21","D2X1KO_22","D2X1KO_23","D2X1KO_24")) print(p6) print(p6_1) print(p6_2) } } #Function for mean tails length ploting #tail_code: "1002", polyA; "1003", polyAU; "1001", polyU. gene_name in "" tail_length_mean_fun <- function(tail_code, gene_name, tail_name){ len = len_LINE1_NGS4_mean[len_LINE1_NGS4_mean$clip3_tail == tail_code,] percent_len <- data.frame(no_row=c(nrow(len[len$clip3_tail_len <= 32 & len$sample == "WT",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "WT",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "WT",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "X1KO",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "X1KO",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "X1KO",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2KO",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2KO",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "D2KO",]), nrow(len[len$clip3_tail_len <= 32 & len$sample == "D2X1KO",]), nrow(len[len$clip3_tail_len > 32 & len$clip3_tail_len <= 64 & len$sample == "D2X1KO",]), nrow(len[len$clip3_tail_len > 64 & len$sample == "D2X1KO",]) ), tail_class=c(rep(c("short (<=32)", "mid (33-64)", "long (>64)"),2 )), sample=c(rep("WT",3),rep("X1KO",3),rep("D2KO",3),rep("D2X1KO",3))) print("Median tail length in WT") medWT = len[len$sample == "WT",] print(median(medWT$clip3_tail_len)) print("Median tail length in X1KO") medX1KO = len[len$sample == "X1KO",] print(median(medX1KO$clip3_tail_len)) print("Median tail length in D2KO") medD2KO = len[len$sample == "D2KO",] print(median(medD2KO$clip3_tail_len)) print("Median tail length in D2X1KO") medD2X1KO = len[len$sample == "D2X1KO",] print(median(medD2X1KO$clip3_tail_len)) print("Mean tail length in WT") print(mean(medWT$clip3_tail_len)) print("Mean tail length in X1KO") print(mean(medX1KO$clip3_tail_len)) print("Mean tail length in D2KO") print(mean(medD2KO$clip3_tail_len)) print("Mean tail length in D2X1KO") print(mean(medD2X1KO$clip3_tail_len)) if(tail_code == "1003"){ print("Median U length in AU tail in WT") print(median(medWT$clip3_tail_len_UinAU)) print("Median U length in AU tail in X1KO") print(median(medX1KO$clip3_tail_len_UinAU)) print("Median U length in AU tail in D2KO") print(median(medD2KO$clip3_tail_len_UinAU)) print("Median U length in AU tail in D2X1KO") print(median(medD2X1KO$clip3_tail_len_UinAU)) print("Mean U length in AU tail in WT") print(mean(medWT$clip3_tail_len_UinAU)) print("Mean U length in AU tail in X1KO") print(mean(medX1KO$clip3_tail_len_UinAU)) print("Mean U length in AU tail in D2KO") print(mean(medD2KO$clip3_tail_len_UinAU)) print("Mean U length in AU tail in D2X1KO") print(mean(medD2X1KO$clip3_tail_len_UinAU)) } p1 = ggplot(percent_len, aes(fill=tail_class, y=no_row, x=sample)) + geom_bar(position="fill", stat="identity") + ggtitle(paste(gene_name,tail_name)) + theme(axis.text.x=element_text(angle=45, hjust=1), axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + scale_fill_manual(values=c("short (<=32)" = "#AED66B", "mid (33-64)" = "#FD8D3C", "long (>64)" = "#6BAED6"), name = "Tail length", breaks = c("long (>64)", "mid (33-64)", "short (<=32)")) + ylab("Percentage of tail lengths") + scale_x_discrete(limits = c("WT","X1KO","D2KO","D2X1KO")) p2 = ggplot(len, aes(x=clip3_tail_len, color=sample)) + geom_density(adjust = 2) + ggtitle(paste(gene_name,tail_name)) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab(paste(tail_name,"tail length")) + scale_color_manual(values=c("#28C8CC", "#F88B84", "#28CC2C", "#CC28C8"), limits = c("WT","X1KO","D2KO","D2X1KO")) p3 = ggplot(len[len$clip3_tail_len <= 64,], aes(x=clip3_tail_len, color=sample)) + geom_density(adjust = 2) + ggtitle(paste(gene_name,tail_name)) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab(paste(tail_name,"tail length")) + scale_color_manual(values=c("#28C8CC", "#F88B84", "#28CC2C", "#CC28C8"), limits = c("WT","X1KO","D2KO","D2X1KO")) p4 = ggplot(len, aes(x = clip3_tail_len, y = sample)) + geom_density_ridges(aes(fill = sample), scale = 0.9, quantile_lines = TRUE) + # bandwidth = 0.1 -wygladza linie plotu ggtitle(paste(gene_name,tail_name)) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab(paste(tail_name,"tail length")) + scale_fill_manual(values=c("#28C8CC", "#F88B84", "#28CC2C", "#CC28C8")) p4_1 = ggplot(len[len$clip3_tail_len <= 64,], aes(x = clip3_tail_len, y = sample)) + geom_density_ridges(aes(fill = sample), scale = 0.9, quantile_lines = TRUE) + ggtitle(paste(gene_name,tail_name)) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab(paste(tail_name,"tail length")) + scale_fill_manual(values=c("#28C8CC", "#F88B84", "#28CC2C", "#CC28C8")) p5 = ggplot(len, aes(x = clip3_tail_len, color=sample)) + stat_ecdf() + ggtitle(paste(gene_name,tail_name)) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab(paste(tail_name,"tail length")) + scale_color_manual(values=c("#28C8CC", "#F88B84", "#28CC2C", "#CC28C8"), limits = c("WT","X1KO","D2KO","D2X1KO")) p5_1 = ggplot(len[len$clip3_tail_len <= 64,], aes(x = clip3_tail_len, color=sample)) + stat_ecdf() + ggtitle(paste(gene_name,tail_name)) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab(paste(tail_name,"tail length")) + scale_color_manual(values=c("#28C8CC", "#F88B84", "#28CC2C", "#CC28C8"), limits = c("WT","X1KO","D2KO","D2X1KO")) print(p1) print(p2) print(p3) print(p4) print(p4_1) print(p5) print(p5_1) if(tail_code == "1003"){ p6 = ggplot(len[len$clip3_tail_len <= 64,], aes(x=clip3_tail_len_UinAU, color=sample)) + geom_density(adjust = 2) + ggtitle(paste(gene_name,"U in polyAU")) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab("length of U in polyAU") + scale_color_manual(values=c("#28C8CC", "#F88B84", "#28CC2C", "#CC28C8"), limits = c("WT","X1KO","D2KO","D2X1KO")) p6_1 = ggplot(len[len$clip3_tail_len <= 64,], aes(x = clip3_tail_len_UinAU, y = sample)) + geom_density_ridges(aes(fill = sample), scale = 0.9, quantile_lines = TRUE) + ggtitle(paste(gene_name,"U in polyAU")) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab("length of U in polyAU") + scale_color_manual(values=c("#28C8CC", "#F88B84", "#28CC2C", "#CC28C8")) p6_2 = ggplot(len[len$clip3_tail_len <= 64,], aes(x = clip3_tail_len_UinAU, color=sample)) + stat_ecdf() + ggtitle(paste(gene_name,"U in polyAU")) + theme(axis.title=element_text(size=14), axis.text=element_text(size=12), legend.title=element_text(size=14), legend.text=element_text(size=12)) + xlab("length of U in polyAU") + scale_color_manual(values=c("#28C8CC", "#F88B84", "#28CC2C", "#CC28C8"), limits = c("WT","X1KO","D2KO","D2X1KO")) print(p6) print(p6_1) print(p6_2) } } #Function for percentage calculation percents_fun = function(sample_name,column){ nrow <- nrow(sample_name) nrow_woOther <- nrow(sample_name[sample_name$clip3_tail!="1000",]) nrow_poliU <- nrow(sample_name[sample_name$clip3_tail=="1001",]) nrow_poliA <- nrow(sample_name[sample_name$clip3_tail=="1002",]) nrow_poliAU <- nrow(sample_name[sample_name$clip3_tail=="1003",]) nrow_noTail <- nrow(sample_name[sample_name$clip3_tail=="2000",]) percent_poliU <- nrow_poliU*100/nrow_woOther percent_poliA <- nrow_poliA*100/nrow_woOther percent_poliAU <- nrow_poliAU*100/nrow_woOther percent_noTail <- nrow_noTail*100/nrow_woOther percent_xxx <- data.frame(percent=c(percent_poliU,percent_poliA,percent_poliAU,percent_noTail), tail=c("U tail","A tail","AU tail","no tail"), sample=c(rep(column,4))) return(percent_xxx) } #Function for number of reads calculation nreads_fun = function(sample_name){ nrow <- nrow(sample_name) nrow_woOther <- nrow(sample_name[sample_name$clip3_tail!="1000",]) nrow_poliU <- nrow(sample_name[sample_name$clip3_tail=="1001",]) nrow_poliA <- nrow(sample_name[sample_name$clip3_tail=="1002",]) nrow_poliAU <- nrow(sample_name[sample_name$clip3_tail=="1003",]) nrow_noTail <- nrow(sample_name[sample_name$clip3_tail=="2000",]) nreads_xxx = data.frame( all = nrow, discarded = nrow - nrow_woOther, poliA = nrow_poliA, poliAU = nrow_poliAU, poliU = nrow_poliU, no_tail = nrow_noTail ) return(nreads_xxx) } #Function to percentage of tails in length range calculation #column in "" percents_bins_fun <- function(sample_name, column){ nrow <- nrow(sample_name) nrow_woOther <- nrow(sample_name[sample_name$clip3_tail!="1000",]) nrow_poliU_1_10 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="1-10",]) nrow_poliU_11_20 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="11-20",]) nrow_poliU_21_30 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="21-30",]) nrow_poliU_31_40 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="31-40",]) nrow_poliU_41_50 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="41-50",]) nrow_poliU_51_60 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="51-60",]) nrow_poliU_61 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="61+",]) nrow_poliA_1_10 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="1-10",]) nrow_poliA_11_20 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="11-20",]) nrow_poliA_21_30 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="21-30",]) nrow_poliA_31_40 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="31-40",]) nrow_poliA_41_50 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="41-50",]) nrow_poliA_51_60 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="51-60",]) nrow_poliA_61 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="61+",]) nrow_poliAU_1_10 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="1-10",]) nrow_poliAU_11_20 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="11-20",]) nrow_poliAU_21_30 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="21-30",]) nrow_poliAU_31_40 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="31-40",]) nrow_poliAU_41_50 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="41-50",]) nrow_poliAU_51_60 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="51-60",]) nrow_poliAU_61 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="61+",]) nrow_noTail <- nrow(sample_name[sample_name$clip3_tail=="2000",]) percent_poliU_1_10 <- nrow_poliU_1_10*100/(nrow_poliU_1_10+nrow_poliA_1_10+nrow_poliAU_1_10) percent_poliU_1_10[is.na(percent_poliU_1_10)] <- 0 percent_poliA_1_10 <- nrow_poliA_1_10*100/(nrow_poliU_1_10+nrow_poliA_1_10+nrow_poliAU_1_10) percent_poliA_1_10[is.na(percent_poliA_1_10)] <- 0 percent_poliAU_1_10 <- nrow_poliAU_1_10*100/(nrow_poliU_1_10+nrow_poliA_1_10+nrow_poliAU_1_10) percent_poliAU_1_10[is.na(percent_poliAU_1_10)] <- 0 percent_poliU_11_20 <- nrow_poliU_11_20*100/(nrow_poliU_11_20+nrow_poliA_11_20+nrow_poliAU_11_20) percent_poliU_11_20[is.na(percent_poliU_11_20)] <- 0 percent_poliA_11_20 <- nrow_poliA_11_20*100/(nrow_poliU_11_20+nrow_poliA_11_20+nrow_poliAU_11_20) percent_poliA_11_20[is.na(percent_poliA_11_20)] <- 0 percent_poliAU_11_20 <- nrow_poliAU_11_20*100/(nrow_poliU_11_20+nrow_poliA_11_20+nrow_poliAU_11_20) percent_poliAU_11_20[is.na(percent_poliAU_11_20)] <- 0 percent_poliU_21_30 <- nrow_poliU_21_30*100/(nrow_poliU_21_30+nrow_poliA_21_30+nrow_poliAU_21_30) percent_poliU_21_30[is.na(percent_poliU_21_30)] <- 0 percent_poliA_21_30 <- nrow_poliA_21_30*100/(nrow_poliU_21_30+nrow_poliA_21_30+nrow_poliAU_21_30) percent_poliA_21_30[is.na(percent_poliA_21_30)] <- 0 percent_poliAU_21_30 <- nrow_poliAU_21_30*100/(nrow_poliU_21_30+nrow_poliA_21_30+nrow_poliAU_21_30) percent_poliAU_21_30[is.na(percent_poliAU_21_30)] <- 0 percent_poliU_31_40 <- nrow_poliU_31_40*100/(nrow_poliU_31_40+nrow_poliA_31_40+nrow_poliAU_31_40) percent_poliU_31_40[is.na(percent_poliU_31_40)] <- 0 percent_poliA_31_40 <- nrow_poliA_31_40*100/(nrow_poliU_31_40+nrow_poliA_31_40+nrow_poliAU_31_40) percent_poliA_31_40[is.na(percent_poliA_31_40)] <- 0 percent_poliAU_31_40 <- nrow_poliAU_31_40*100/(nrow_poliU_31_40+nrow_poliA_31_40+nrow_poliAU_31_40) percent_poliAU_31_40[is.na(percent_poliAU_31_40)] <- 0 percent_poliU_41_50 <- nrow_poliU_41_50*100/(nrow_poliU_41_50+nrow_poliA_41_50+nrow_poliAU_41_50) percent_poliU_41_50[is.na(percent_poliU_41_50)] <- 0 percent_poliA_41_50 <- nrow_poliA_41_50*100/(nrow_poliU_41_50+nrow_poliA_41_50+nrow_poliAU_41_50) percent_poliA_41_50[is.na(percent_poliA_41_50)] <- 0 percent_poliAU_41_50 <- nrow_poliAU_41_50*100/(nrow_poliU_41_50+nrow_poliA_41_50+nrow_poliAU_41_50) percent_poliAU_41_50[is.na(percent_poliAU_41_50)] <- 0 percent_poliU_51_60 <- nrow_poliU_51_60*100/(nrow_poliU_51_60+nrow_poliA_51_60+nrow_poliAU_51_60) percent_poliU_51_60[is.na(percent_poliU_51_60)] <- 0 percent_poliA_51_60 <- nrow_poliA_51_60*100/(nrow_poliU_51_60+nrow_poliA_51_60+nrow_poliAU_51_60) percent_poliA_51_60[is.na(percent_poliA_51_60)] <- 0 percent_poliAU_51_60 <- nrow_poliAU_51_60*100/(nrow_poliU_51_60+nrow_poliA_51_60+nrow_poliAU_51_60) percent_poliAU_51_60[is.na(percent_poliAU_51_60)] <- 0 percent_poliU_61 <- nrow_poliU_61*100/(nrow_poliU_61+nrow_poliA_61+nrow_poliAU_61) percent_poliU_61[is.na(percent_poliU_61)] <- 0 percent_poliA_61 <- nrow_poliA_61*100/(nrow_poliU_61+nrow_poliA_61+nrow_poliAU_61) percent_poliA_61[is.na(percent_poliA_61)] <- 0 percent_poliAU_61 <- nrow_poliAU_61*100/(nrow_poliU_61+nrow_poliA_61+nrow_poliAU_61) percent_poliAU_61[is.na(percent_poliAU_61)] <- 0 percent_xxx <- data.frame(percent=c(percent_poliU_1_10,percent_poliA_1_10,percent_poliAU_1_10, percent_poliU_11_20,percent_poliA_11_20,percent_poliAU_11_20, percent_poliU_21_30,percent_poliA_21_30,percent_poliAU_21_30, percent_poliU_31_40,percent_poliA_31_40,percent_poliAU_31_40, percent_poliU_41_50,percent_poliA_41_50,percent_poliAU_41_50, percent_poliU_51_60,percent_poliA_51_60,percent_poliAU_51_60, percent_poliU_61,percent_poliA_61,percent_poliAU_61), tail=c(rep(c("U tail","A tail","AU tail"),7)), range=(c(rep("1-10",3),rep("11-20",3),rep("21-30",3),rep("31-40",3),rep("41-50",3),rep("51-60",3),rep("61+",3))), sample=c(rep(column,21))) return(percent_xxx) } #Function to number of reads in length range calculation #column in "" nreads_bins_fun <- function(sample_name, column){ nrow <- nrow(sample_name) nrow_woOther <- nrow(sample_name[sample_name$clip3_tail!="1000",]) nrow_poliU_1_10 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="1-10",]) nrow_poliU_11_20 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="11-20",]) nrow_poliU_21_30 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="21-30",]) nrow_poliU_31_40 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="31-40",]) nrow_poliU_41_50 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="41-50",]) nrow_poliU_51_60 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="51-60",]) nrow_poliU_61 <- nrow(sample_name[sample_name$clip3_tail=="1001" & sample_name$clip3_tail_len_range=="61+",]) nrow_poliA_1_10 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="1-10",]) nrow_poliA_11_20 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="11-20",]) nrow_poliA_21_30 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="21-30",]) nrow_poliA_31_40 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="31-40",]) nrow_poliA_41_50 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="41-50",]) nrow_poliA_51_60 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="51-60",]) nrow_poliA_61 <- nrow(sample_name[sample_name$clip3_tail=="1002" & sample_name$clip3_tail_len_range=="61+",]) nrow_poliAU_1_10 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="1-10",]) nrow_poliAU_11_20 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="11-20",]) nrow_poliAU_21_30 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="21-30",]) nrow_poliAU_31_40 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="31-40",]) nrow_poliAU_41_50 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="41-50",]) nrow_poliAU_51_60 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="51-60",]) nrow_poliAU_61 <- nrow(sample_name[sample_name$clip3_tail=="1003" & sample_name$clip3_tail_len_range=="61+",]) nrow_noTail <- nrow(sample_name[sample_name$clip3_tail=="2000",]) nreads_xxx = data.frame( poliA_1_10 = nrow_poliA_1_10, poliA_11_20 = nrow_poliA_11_20, poliA_21_30 = nrow_poliA_21_30, poliA_31_40 = nrow_poliA_31_40, poliA_41_50 = nrow_poliA_41_50, poliA_51_60 = nrow_poliA_51_60, poliA_61 = nrow_poliA_61, poliAU_1_10 = nrow_poliAU_1_10, poliAU_11_20 = nrow_poliAU_11_20, poliAU_21_30 = nrow_poliAU_21_30, poliAU_31_40 = nrow_poliAU_31_40, poliAU_41_50 = nrow_poliAU_41_50, poliAU_51_60 = nrow_poliAU_51_60, poliAU_61 = nrow_poliAU_61, poliU_1_10 = nrow_poliU_1_10, poliU_11_20 = nrow_poliU_11_20, poliU_21_30 = nrow_poliU_21_30, poliU_31_40 = nrow_poliU_31_40, poliU_41_50 = nrow_poliU_41_50, poliU_51_60 = nrow_poliU_51_60, poliU_61 = nrow_poliU_61 ) nreads_xxx = t(nreads_xxx) colnames(nreads_xxx) = column return(nreads_xxx) } #Function to percentage of U in AU tails in length range #column in "" percents_AU_bins_fun <- function(sample_name, column){ sample_name$clip3_tail_U_len_range <- case_when( sample_name$clip3_tail_len_UinAU == 1 ~ "1", sample_name$clip3_tail_len_UinAU == 2 ~ "2", sample_name$clip3_tail_len_UinAU>=3 & sample_name$clip3_tail_len_UinAU<=4 ~ "3-4", sample_name$clip3_tail_len_UinAU>=5 & sample_name$clip3_tail_len_UinAU<=7 ~ "5-7", sample_name$clip3_tail_len_UinAU>=8 & sample_name$clip3_tail_len_UinAU<2000 ~ "8+", sample_name$clip3_tail_len_UinAU == 0 ~ "0", TRUE ~ as.character(sample_name$clip3_tail_len_UinAU) ) sample_name <- sample_name[sample_name$clip3_tail == 1003,] nrow <- nrow(sample_name) nrow_1U_1_10 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="1-10",]) nrow_1U_11_20 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="11-20",]) nrow_1U_21_30 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="21-30",]) nrow_1U_31_40 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="31-40",]) nrow_1U_41_50 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="41-50",]) nrow_1U_51_60 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="51-60",]) nrow_1U_61 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="61+",]) nrow_2U_1_10 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="1-10",]) nrow_2U_11_20 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="11-20",]) nrow_2U_21_30 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="21-30",]) nrow_2U_31_40 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="31-40",]) nrow_2U_41_50 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="41-50",]) nrow_2U_51_60 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="51-60",]) nrow_2U_61 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="61+",]) nrow_34U_1_10 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="1-10",]) nrow_34U_11_20 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="11-20",]) nrow_34U_21_30 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="21-30",]) nrow_34U_31_40 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="31-40",]) nrow_34U_41_50 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="41-50",]) nrow_34U_51_60 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="51-60",]) nrow_34U_61 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="61+",]) nrow_57U_1_10 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="1-10",]) nrow_57U_11_20 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="11-20",]) nrow_57U_21_30 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="21-30",]) nrow_57U_31_40 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="31-40",]) nrow_57U_41_50 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="41-50",]) nrow_57U_51_60 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="51-60",]) nrow_57U_61 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="61+",]) nrow_8U_1_10 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="1-10",]) nrow_8U_11_20 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="11-20",]) nrow_8U_21_30 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="21-30",]) nrow_8U_31_40 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="31-40",]) nrow_8U_41_50 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="41-50",]) nrow_8U_51_60 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="51-60",]) nrow_8U_61 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="61+",]) nrow_noTail <- nrow(sample_name[sample_name$clip3_tail==2000,]) nrow_xxx <- data.frame(reads=c(nrow_1U_1_10,nrow_2U_1_10,nrow_34U_1_10,nrow_57U_1_10,nrow_8U_1_10, nrow_1U_11_20,nrow_2U_11_20,nrow_34U_11_20,nrow_57U_11_20,nrow_8U_11_20, nrow_1U_21_30,nrow_2U_21_30,nrow_34U_21_30,nrow_57U_21_30,nrow_8U_21_30, nrow_1U_31_40,nrow_2U_31_40,nrow_34U_31_40,nrow_57U_31_40,nrow_8U_31_40, nrow_1U_41_50,nrow_2U_41_50,nrow_34U_41_50,nrow_57U_41_50,nrow_8U_41_50, nrow_1U_51_60,nrow_2U_51_60,nrow_34U_51_60,nrow_57U_51_60,nrow_8U_51_60, nrow_1U_61,nrow_2U_61,nrow_34U_61,nrow_57U_61,nrow_8U_61), U_range=c(rep(c("1","2","3-4","5-7","8+"),7)), range=(c(rep("1-10",5),rep("11-20",5),rep("21-30",5),rep("31-40",5),rep("41-50",5),rep("51-60",5),rep("61+",5))), sample=c(rep(column,35))) nrow_xxx = transform(nrow_xxx, percent = ave(reads, range, FUN = prop.table)) nrow_xxx$percent <- ifelse(is.na(nrow_xxx$percent),0,nrow_xxx$percent) return(nrow_xxx) } #Function to percentage of U in AU tails in length range #column in "" percents_UandAU_bins_fun <- function(sample_name, column){ sample_name$clip3_tail_U_len_range <- case_when( sample_name$clip3_tail_len_UinAU == 1 ~ "1", sample_name$clip3_tail_len_UinAU == 2 ~ "2", sample_name$clip3_tail_len_UinAU>=3 & sample_name$clip3_tail_len_UinAU<=4 ~ "3-4", sample_name$clip3_tail_len_UinAU>=5 & sample_name$clip3_tail_len_UinAU<=7 ~ "5-7", sample_name$clip3_tail_len_UinAU>=8 & sample_name$clip3_tail_len_UinAU<2000 ~ "8+", sample_name$clip3_tail_len_UinAU == 0 ~ "0", TRUE ~ as.character(sample_name$clip3_tail_len_UinAU) ) sample_name = sample_name[sample_name$clip3_tail %in% c(1001,1003),] nrow <- nrow(sample_name) nrow_1U_1_10 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="1-10",]) nrow_1U_11_20 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="11-20",]) nrow_1U_21_30 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="21-30",]) nrow_1U_31_40 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="31-40",]) nrow_1U_41_50 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="41-50",]) nrow_1U_51_60 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="51-60",]) nrow_1U_61 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="1" & sample_name$clip3_tail_len_range=="61+",]) nrow_2U_1_10 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="1-10",]) nrow_2U_11_20 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="11-20",]) nrow_2U_21_30 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="21-30",]) nrow_2U_31_40 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="31-40",]) nrow_2U_41_50 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="41-50",]) nrow_2U_51_60 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="51-60",]) nrow_2U_61 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="2" & sample_name$clip3_tail_len_range=="61+",]) nrow_34U_1_10 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="1-10",]) nrow_34U_11_20 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="11-20",]) nrow_34U_21_30 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="21-30",]) nrow_34U_31_40 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="31-40",]) nrow_34U_41_50 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="41-50",]) nrow_34U_51_60 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="51-60",]) nrow_34U_61 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="3-4" & sample_name$clip3_tail_len_range=="61+",]) nrow_57U_1_10 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="1-10",]) nrow_57U_11_20 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="11-20",]) nrow_57U_21_30 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="21-30",]) nrow_57U_31_40 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="31-40",]) nrow_57U_41_50 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="41-50",]) nrow_57U_51_60 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="51-60",]) nrow_57U_61 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="5-7" & sample_name$clip3_tail_len_range=="61+",]) nrow_8U_1_10 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="1-10",]) nrow_8U_11_20 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="11-20",]) nrow_8U_21_30 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="21-30",]) nrow_8U_31_40 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="31-40",]) nrow_8U_41_50 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="41-50",]) nrow_8U_51_60 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="51-60",]) nrow_8U_61 <- nrow(sample_name[sample_name$clip3_tail_U_len_range=="8+" & sample_name$clip3_tail_len_range=="61+",]) nrow_noTail <- nrow(sample_name[sample_name$clip3_tail==2000,]) nrow_xxx <- data.frame(reads=c(nrow_1U_1_10,nrow_2U_1_10,nrow_34U_1_10,nrow_57U_1_10,nrow_8U_1_10, nrow_1U_11_20,nrow_2U_11_20,nrow_34U_11_20,nrow_57U_11_20,nrow_8U_11_20, nrow_1U_21_30,nrow_2U_21_30,nrow_34U_21_30,nrow_57U_21_30,nrow_8U_21_30, nrow_1U_31_40,nrow_2U_31_40,nrow_34U_31_40,nrow_57U_31_40,nrow_8U_31_40, nrow_1U_41_50,nrow_2U_41_50,nrow_34U_41_50,nrow_57U_41_50,nrow_8U_41_50, nrow_1U_51_60,nrow_2U_51_60,nrow_34U_51_60,nrow_57U_51_60,nrow_8U_51_60, nrow_1U_61,nrow_2U_61,nrow_34U_61,nrow_57U_61,nrow_8U_61), U_range=c(rep(c("1","2","3-4","5-7","8+"),7)), range=(c(rep("1-10",5),rep("11-20",5),rep("21-30",5),rep("31-40",5),rep("41-50",5),rep("51-60",5),rep("61+",5))), sample=c(rep(column,35))) nrow_xxx = transform(nrow_xxx, percent = ave(reads, range, FUN = prop.table)) nrow_xxx$percent <- ifelse(is.na(nrow_xxx$percent),0,nrow_xxx$percent) return(nrow_xxx) }