--- title: "DE analysis_Oligos_forBestRes" author: "Nina-Lydia Kazakou" date: "27/06/2021" output: html_document --- # load libraries ```{r message=FALSE} library(SingleCellExperiment) library(Seurat) library(scater) library(scran) library(devtools) library(dplyr) library(ggsci) library(tidyverse) library(Matrix) library(scales) library(here) ``` # Set the colour pallete ```{r} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9) mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7) show_col(mycoloursP, labels =F) ``` ```{r} co.oligos <- readRDS(here("data", "Oligos_only", "co.oligos.rds")) dim(co.oligos) # 22735 301 head(co.oligos@meta.data) ``` ```{r} DefaultAssay(co.oligos) <- "RNA" ``` ```{r} # General Plots DimPlot(co.oligos, pt.size = 1.5, cols = mycoloursP[5:7], group.by = "Phase") & NoAxes() DimPlot(co.oligos, pt.size = 1.5, cols = mycoloursP[12:16], group.by = "Sample") & NoAxes() DimPlot(co.oligos, pt.size = 1.5, cols = mycoloursP[21:22], group.by = "scDblFinder.class") & NoAxes() ``` # Identification of marker genes ```{r} int_res_all_mark <- function(seur_obj, int_cols, only_pos = TRUE, min_pct = 0.25, logfc_threshold = 0.25, fil_pct_1 = 0.25, fil_pct_2 = 0.6, avg_log = 1.2, save_dir = getwd(), test_use = "MAST" ){ for(i in 1:length(int_cols)){ Idents(seur_obj) <- int_cols[i] all_mark <- FindAllMarkers(seur_obj, only.pos = only_pos, min.pct = min_pct, logfc.threshold = logfc_threshold, test.use = test_use) fil_mark<- subset(all_mark, all_mark$pct.1 > fil_pct_1 & all_mark$pct.2 < fil_pct_2 ) write.csv(all_mark, paste(save_dir, "/all_mark", int_cols[i], ".csv", sep = "" )) write.csv(fil_mark, paste(save_dir, "/fil_mark", int_cols[i], ".csv", sep = "" )) } } dir.create(here("outs", "Oligos_only", "MarkerGenes")) int_res_all_mark(co.oligos, int_cols = c("RNA_snn_res.0.05", "RNA_snn_res.0.1", "RNA_snn_res.0.2", "RNA_snn_res.0.3", "RNA_snn_res.0.6", "RNA_snn_res.0.7", "RNA_snn_res.0.9", "RNA_snn_res.1"), save_dir = here("outs", "Oligos_only", "MarkerGenes")) ``` Resolutions 0.5 & 0.6 look very similar; a single cell is allocated within a different cluster and that is all the difference. The same happens between resolution 0.7 & 0.9, so I decided to use the later for comparisons in both cases. Here, I will plot the filtered genes for both resolutions to see which one is more suitable. ```{r} Idents(co.oligos) <- "RNA_snn_res.0.6" fil.de.markers.0.6 <- read.csv(here("outs", "Oligos_only", "MArkerGenes", "fil_markRNA_snn_res.0.6.csv")) head(fil.de.markers.0.6) # Identify the top10 genes top10_res.0.6 <- fil.de.markers.0.6 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes top10_res.0.6 # Calculate the cluster averages to plot those as well cluster_averages_res.0.6 <- AverageExpression(co.oligos, group.by = "RNA_snn_res.0.6", return.seurat = TRUE) cluster_averages_res.0.6@meta.data$cluster <- factor(levels(as.factor(co.oligos@meta.data$RNA_snn_res.0.6)), levels = c(0, 1, 2, 3)) ``` ```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE} DoHeatmap(co.oligos, features = top10_res.0.6$gene, label = TRUE, group.by = "RNA_snn_res.0.6", group.colors = mycoloursP[6:40], draw.lines = FALSE, size = 3) DoHeatmap(object = cluster_averages_res.0.6, features = top10_res.0.6$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F) ``` ```{r} Idents(co.oligos) <- "RNA_snn_res.0.9" fil.de.markers.0.9 <- read.csv(here("outs", "Oligos_only", "MArkerGenes", "fil_markRNA_snn_res.0.9.csv")) head(fil.de.markers.0.9) # Identify the top10 genes top10_res.0.9 <- fil.de.markers.0.9 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes top10_res.0.9 # Calculate the cluster averages to plot those as well cluster_averages_res.0.9 <- AverageExpression(co.oligos, group.by = "RNA_snn_res.0.9", return.seurat = TRUE) cluster_averages_res.0.9@meta.data$cluster <- factor(levels(as.factor(co.oligos@meta.data$RNA_snn_res.0.9)), levels = c(0, 1, 2, 3, 4)) ``` ```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE} DoHeatmap(co.oligos, features = top10_res.0.9$gene, label = TRUE, group.by = "RNA_snn_res.0.9", group.colors = mycoloursP[6:40], draw.lines = FALSE, size = 3) DoHeatmap(object = cluster_averages_res.0.9, features = top10_res.0.9$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F) ``` Maybe I should also look at resolution 1 ```{r} Idents(co.oligos) <- "RNA_snn_res.1" fil.de.markers.1 <- read.csv(here("outs", "Oligos_only", "MArkerGenes", "fil_markRNA_snn_res.1.csv")) head(fil.de.markers.1) # Identify the top10 genes top10_res.1 <- fil.de.markers.1 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes top10_res.1 # Calculate the cluster averages to plot those as well cluster_averages_res.1 <- AverageExpression(co.oligos, group.by = "RNA_snn_res.1", return.seurat = TRUE) cluster_averages_res.1@meta.data$cluster <- factor(levels(as.factor(co.oligos@meta.data$RNA_snn_res.1)), levels = c(0, 1, 2, 3, 4, 5)) ``` ```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE} DoHeatmap(co.oligos, features = top10_res.1$gene, label = TRUE, group.by = "RNA_snn_res.1", group.colors = mycoloursP[6:40], draw.lines = FALSE, size = 3) DoHeatmap(object = cluster_averages_res.1, features = top10_res.1$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F) ``` ```{r fig.width=7, fig.height=7, fig.fullwidth=TRUE} # Oligolineage FeaturePlot(co.oligos, features = c("OLIG1", "OLIG2", "SOX10", "PPP1R16B"), ncol = 2, pt.size = 1) & NoAxes() ``` ```{r fig.width=7, fig.height=18, fig.fullwidth=TRUE} # OPCs FeaturePlot(co.oligos, features = c("PDGFRA", "CSPG4", "GPR17", "PTPRZ1", "OLIG1", "OLIG2", "PCDH15", "PTGDS", "BCAN", "CABLES1", "GFRA1", "LINC01965"), ncol = 2, pt.size = 1) & NoAxes() ``` ```{r fig.width=7, fig.height=18, fig.fullwidth=TRUE} # COPs FeaturePlot(co.oligos, features = c("ETV1", "CHST9", "MYT1", "TENM2", "CAMK2A", "KCNQ5", "SEMA5B", "SYT1", "GPR17", "BMPER", "EPHB1", "ARHGAP24"), pt.size = 1, ncol = 2) & NoAxes() ``` ```{r fig.width=7, fig.height=10, fig.fullwidth=TRUE} # Oligodendrocytes FeaturePlot(co.oligos, features = c("PLP1", "CNP", "MAG", "MOG", "MOBP", "MBP", "SOX10" ), pt.size = 1, ncol = 2) & NoAxes() ``` ```{r fig.width=7, fig.height=18, fig.fullwidth=TRUE} # More Oligodendrocytes FeaturePlot(co.oligos, features = c("SNAP25", "CNTN1", "FRY", "PLXDC2", "PTPRM", "FOS","VIM", "JUNB", "AFF3", "FSTL5", "SGCZ", "MDGA2" ), pt.size = 1, ncol = 2) & NoAxes() ``` Some markers from the monolayer dataset that helped me identify oligo clusters there ```{r fig.width=7, fig.height=7, fig.fullwidth=TRUE} # OAPCs FeaturePlot(co.oligos, features = c("HOPX", "SPARCL1", "SPARC", "GFAP"), ncol = 2, pt.size = 1) & NoAxes() ``` ```{r fig.width=7, fig.height=7, fig.fullwidth=TRUE} # Primitive OPCs FeaturePlot(co.oligos, features = c("PPP1R14B", "DLL1", "DLL3", "EGFR"), ncol = 2, pt.size = 1) & NoAxes() ``` ```{r} # COPs FeaturePlot(co.oligos, features = c("MYRF", "NKX2-2", "MYC"), ncol = 2, pt.size = 1) & NoAxes() ``` ```{r} sessionInfo() ```