--- title: "Alternative Clustering for oligo subset" author: "Nina-Lydia Kazakou" date: "08/07/2021" output: html_document --- # load libraries ```{r message=FALSE} library(SingleCellExperiment) library(Seurat) library(scater) library(scran) library(devtools) library(dplyr) library(ggsci) library(tidyverse) library(Matrix) library(scales) library(here) ``` # Set the colour pallete ```{r include=FALSE} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9) mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7) show_col(mycoloursP, labels =F) ``` # Load Object ```{r} alt.co.oligos <- readRDS(here("data", "Oligos_only", "co.oligos.rds")) ``` # Add the clustering resolution of interest to the object ```{r} alt.co.oligos <- FindNeighbors(alt.co.oligos, dims = 1:14) alt.co.oligos <- FindClusters(alt.co.oligos, resolution = 2) #Do not re-run the UMAP ``` ```{r} DimPlot(alt.co.oligos, group.by = "RNA_snn_res.2", cols = mycoloursP, label = TRUE, pt.size = 1) & NoAxes() ``` What I would like to do here is keep the structure of "RNA_snn_res.1" clusters for the respective Cluster0, Cluster3 & Cluster4, but incorporate the "RNA_snn_res.2" clustering for the respective Cluster 1, Clusetr 2 & Cluster 5 (of "RNA_snn_res.1"). In other words I would like these three clusters into 4, while keeping the rest of the clustering. # Alternative Clustering ```{r} alt.co.oligos@meta.data$ManualClusters <- ifelse(alt.co.oligos@meta.data$RNA_snn_res.2 == 5, "2_A", ifelse(alt.co.oligos@meta.data$RNA_snn_res.2 == 7, "2_B", paste(alt.co.oligos@meta.data$RNA_snn_res.1))) alt.co.oligos@meta.data$ManualClusters <- ifelse(alt.co.oligos@meta.data$ManualClusters == 2, "2_B", paste(alt.co.oligos@meta.data$ManualClusters)) ``` ```{r} Idents(alt.co.oligos) <- "ManualClusters" DimPlot(alt.co.oligos, cols = mycoloursP[15:40], label = TRUE, pt.size = 1) & NoAxes() DimPlot(alt.co.oligos, group.by = "Sample",cols = mycoloursP[20:40], label = FALSE, pt.size = 1) & NoAxes() ``` ```{r fig.width=12, fig.height=6, fig.fullwidth=TRUE} DimPlot(alt.co.oligos, split.by = "Sample", cols = mycoloursP[15:40], label = FALSE, pt.size = 1.5) & NoAxes() ``` ```{r fig.width=8, fig.height=10, fig.fullwidth=TRUE} VlnPlot(alt.co.oligos, features = c("PDGFRA", "PCDH15", "MBP", "CNP", "NKX2-2", "MYC", "SYT1"), cols = mycoloursP, ncol = 2) ``` ```{r fig.width=12, fig.height=8, fig.fullwidth=TRUE} DotPlot(alt.co.oligos, features = c("OLIG2", "SOX10", "SPARC", "HOPX", "GFAP", "EGFR", "DLL1", "DLL3", "PDGFRA", "PCDH15", "SOX6", "SYT1", "MYC", "NKX2-2", "GPR17", "MYRF", "MBP", "PLP1", "CNP", "MAG", "MOG", "MOBP"), cols = mycoloursP[5:6]) + FontSize(7) ``` ```{r} saveRDS(alt.co.oligos, here("data", "Oligos_only", file = "alt.co.oligos.rds")) ``` # Run DE analysis with the new clustering ```{r} Idents(alt.co.oligos) <- "ManualClusters" de.markers.alt.co.oligos <- FindAllMarkers(alt.co.oligos, min.pct = 0.25, logfc.threshold = 0.25,term.use = "MAST", only.pos = TRUE ) head(de.markers.alt.co.oligos) write.csv(de.markers.alt.co.oligos, here("outs", "Oligos_only", file = "de.markers.alt.co.oligos.csv")) #Filter them pct.1_fil = 0.25 pct.2_fil = 0.5 de.markers.alt.co.oligos.fil <- subset(de.markers.alt.co.oligos, de.markers.alt.co.oligos$pct.1 > pct.1_fil & de.markers.alt.co.oligos$pct.2 < pct.2_fil) head(de.markers.alt.co.oligos) write.csv(de.markers.alt.co.oligos.fil, here("outs", "Oligos_only",file = "de.markers.alt.co.oligos.fil.csv")) ``` # Plot a HP of the top10_genes between Clusters 2A & 2B ```{r} #Filter for the filtered markers of the clusters of interest int_de.markers.alt.co.oligos.fil <- de.markers.alt.co.oligos.fil %>% filter(cluster == c("2_A", "2_B")) head(int_de.markers.alt.co.oligos.fil) write.csv(int_de.markers.alt.co.oligos.fil, here("outs", "Oligos_only", file = "int_de.markers.alt.co.oligos.fil.csv")) #Identify top10 genes int_top10 <- int_de.markers.alt.co.oligos.fil %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) levels(alt.co.oligos) int_clust <- subset(alt.co.oligos, ident = c("2_A", "2_B")) ``` ```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE} DoHeatmap(int_clust, features = int_top10$gene, label = TRUE, group.by = "ManualClusters", group.colors = mycoloursP[6:40], draw.lines = FALSE) ``` # Plot a HP for the top10_genes ```{r fig.width=13, fig.height=18, fig.fullwidth=TRUE} # Do a heatmap from the cluster averages alt.co.oligos@meta.data$ClusterID <- alt.co.oligos@meta.data$ManualClusters current.ids <- c("0", "1", "2_A", "2_B", "3", "4", "5") new.ids <- c("Primitive OPC", "COP", "Immature OL", "OPC", "OAPC", "OL", "Other OPC") alt.co.oligos@meta.data$ClusterID <- plyr::mapvalues(x = alt.co.oligos@meta.data$ClusterID, from = current.ids, to = new.ids) #add a column to the meta.data to match the amount of clusters, after the manual clustering head(alt.co.oligos@meta.data) Idents(alt.co.oligos) <- "ClusterID" # DE Genes calculated after the mannual clustering filtered top10_fil <- de.markers.alt.co.oligos.fil %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes # DE Genes calculated after the mannual clustering unfiltered top10_unfil <- de.markers.alt.co.oligos %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes cluster_averages <- AverageExpression(alt.co.oligos, group.by = "ClusterID", return.seurat = TRUE) cluster_averages@meta.data$cluster <- factor(levels(as.factor(alt.co.oligos@meta.data$ClusterID)), levels = c("Primitive OPC", "COP", "Immature OL", "OPC", "OAPC", "OL", "Other OPC")) DoHeatmap(object = cluster_averages, features = top10_fil$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F) + ggtitle("Top10 Filtered DE Genes") + theme(plot.title = element_text(size = 30)) DoHeatmap(object = cluster_averages, features = top10_unfil$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F) + ggtitle("Top10 DE Genes") + theme(plot.title = element_text(size = 30)) ``` ! New problem: it seems that in this dataset the OPCs are SPARCL1+ & I have a SPARC+ population that I characterize as OAPC. In the monolayer dataset, I have characterised the SPARCL1 poulation as OAPC. # Pairwise comparison of neighbouring clusters ```{r} Idents(alt.co.oligos) <- "ClusterID" clust_id_list_MC <- list(list("Primitive OPC", "COP"), list("COP", "Immature OL"), list("COP", "OPC"), list("Immature OL", "OPC"), list("Immature OL", "OL"), list("OPC", "Other OPC")) ``` ```{r} pairwise_mark <- function(seur_obj, int_cols, clust_id_list, only_pos = TRUE, min_pct = 0.25, logfc_threshold = 0.25, fil_pct_1 = 0.25, fil_pct_2 = 0.1, save_dir = getwd(), test_use = "MAST"){ for(k in 1:length(int_cols)){ Idents(seur_obj) <- int_cols[k] for(i in 1: length(clust_id_list)){ clust_mark <- FindMarkers(seur_obj, ident.1 = clust_id_list[[i]][[1]], ident.2 = clust_id_list[[i]][[2]], min.pct = min_pct, test.use = test_use) clust_mark$cluster <- clust_id_list[[i]][[1]] clust_mark$comp_to_clust <- clust_id_list[[i]][[2]] write.csv(clust_mark, paste(save_dir, "/", int_cols[k], "_", clust_id_list[[i]][[1]], "_", clust_id_list[[i]][[2]], ".csv", sep = "" )) } } } dir.create(here("outs", "Oligos_only", "MarkerGenes_AlternativeClustering", "Pairwise")) pairwise_mark(alt.co.oligos, int_cols = "ClusterID", save_dir = here("outs", "Oligos_only", "MarkerGenes_AlternativeClustering", "Pairwise"), clust_id_list = clust_id_list_MC) ``` # Preapre data for plotting differentially expressed genes ```{r} gen_mark_list <-function(file_dir = getwd(), avg_log = 1.2, pct_1 = 0.25, pct_2 = 0.6, pairwise = FALSE ){ temp = list.files(path = file_dir, pattern="*.csv") myfiles = lapply(paste(file_dir, temp, sep = "/"), read.csv) for(i in 1:length(myfiles)){ dat <- myfiles[[i]] av_log_fil <- subset(dat, dat$avg_log2FC > avg_log & dat$pct.1 > pct_1 & dat$pct.2 < pct_2) if(pairwise == TRUE){ top10 <- av_log_fil %>% top_n(10, avg_log2FC) top10$gene <- top10$X }else{ av_log_fil$cluster <- as.character(av_log_fil$cluster) top10 <- av_log_fil %>% group_by(cluster) %>% top_n(10, avg_log2FC) } if(i ==1){ fil_genes <- top10 }else{ fil_genes <- rbind(fil_genes, top10) } fil_genes <- fil_genes[!duplicated(fil_genes$gene),] } return(fil_genes) } fil_genes <- gen_mark_list(here("outs", "Oligos_only", "MarkerGenes_AlternativeClustering", "AllMarkers")) fil_genes_pw <- gen_mark_list(here("outs", "Oligos_only", "MarkerGenes_AlternativeClustering", "Pairwise"), pairwise = TRUE) int_genes <- c(fil_genes$gene, fil_genes_pw$gene) int_genes <- unique(int_genes) ``` ```{r fig.width=13, fig.height=18, fig.fullwidth=TRUE} DoHeatmap(object = cluster_averages, features = int_genes, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F) + ggtitle("AllMarkers & Pairwise: Top10 Filtered DE Genes") + theme(plot.title = element_text(size = 30)) ``` ```{r} sessionInfo() ```