--- title: "Integration with Pasca O4+ dataset" author: "Nina-Lydia Kazakou" date: "06/07/2021" output: html_document --- # Load libraries ```{r} library(GEOquery) library(Seurat) library(tidyverse) library(ggplot2) library(dplyr) library(ggsci) library(here) ``` # Set the colour pallete ```{r} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9) mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7) ``` # Download data https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE115011 ```{r} getGEOSuppFiles("GSE115011") ``` # Create suerat.object ```{r} csvfile <- here("Materials", "GSE115011", "GSE115011_marton_all_cells.csv") #Error duplicate 'row.names' are not allowed #Save with no row.names if (exists("csvfile")) { raw_counts <- read.table(file = csvfile , header = TRUE, sep = ',', row.names = NULL) } if (exists("tsvfile")) { raw_counts <- read.table(file = tsvfile , header = TRUE, sep = '\t', row.names = NULL) } #Check the dimension and format dim(raw_counts) raw_counts[1:5, 1:3] # Which names are duplicated? name_duplicate <- raw_counts[which(duplicated(raw_counts$Gene.name)),1] length(name_duplicate) # number of gene name duplicates (52 in this case) # Which are exactly all the raw duplicated? row_duplicate <- raw_counts[which(duplicated(raw_counts)),1] length(row_duplicate) # number of exactly all raw duplicated (10) # You might want to save these name_duplicate and row_duplicate genes # Delete the ones that are exactly the same # and change name to the ones duplicated but with different values. raw_counts <- unique(raw_counts) dim(raw_counts) #Should have (10) less than before Gene_name <- make.unique(as.character(raw_counts$Gene.name)) # Reassign the rownames setted to NULL unil now to the new Gene names rownames(raw_counts) <- Gene_name raw_counts <- raw_counts[,-1] # get rid of old names exp_GSE115011 <- CreateSeuratObject(counts = raw_counts) exp_GSE115011 ``` # Find variable features ```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE} exp_GSE115011 <- FindVariableFeatures(exp_GSE115011, selection.method = "vst", nfeatures = 2000) # Identify the 10 most highly variable genes top10 <- head(VariableFeatures(exp_GSE115011), 10) # plot variable features with and without labels plot1 <- VariableFeaturePlot(exp_GSE115011, cols = mycoloursP[4:5]) plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) plot2 ``` # Scale all genes ```{r} all_genes <- rownames(exp_GSE115011) exp_GSE115011 <- ScaleData(exp_GSE115011, features = all_genes) ``` # Linear dimension reduction ```{r} exp_GSE115011 <- RunPCA(exp_GSE115011, features = VariableFeatures(object = exp_GSE115011), npcs = 100) ElbowPlot(exp_GSE115011, ndims = 100) exp_GSE115011 <- FindNeighbors(exp_GSE115011, dims = 1:80) exp_GSE115011 <- FindClusters(exp_GSE115011, resolution = 0.5) exp_GSE115011 <- RunUMAP(exp_GSE115011, dims = 1:80) exp_GSE115011 <- RunTSNE(exp_GSE115011, dims = 1:80, check_duplicates = FALSE) ``` ```{r fig.width=6, fig.height=5, fig.fullwidth=TRUE} # Plots DimPlot(exp_GSE115011, reduction = "umap", label = T, cols = mycoloursP[6:40], pt.size = 1) & NoAxes() DimPlot(exp_GSE115011, reduction = "tsne", label = T, cols = mycoloursP[6:40], pt.size = 1) & NoAxes() ``` ```{r fig.width=6, fig.height=15, fig.fullwidth=TRUE} FeaturePlot(exp_GSE115011, features = c("OLIG1", "OLIG2", "SOX10", "PDGFRA", "PCDH15", "PTPRZ1", "CSPG4", "GPR17", "MYRF", "MYC", "BMP4", "NKX2-2", "MAL", "MOG", "MBP", "PLP1", "CNP", "BCAN", "NEU4"), ncol = 2) & NoAxes() ``` ```{r fig.width=6, fig.height=8, fig.fullwidth=TRUE} FeaturePlot(exp_GSE115011, reduction = "umap", features = c("OPALIN", "KLK6", "RBFOX1", "SPARC", "SPARCL1", "DLL1", "EGFR", "GFAP"), ncol = 2) & NoAxes() ``` ```{r} saveRDS(exp_GSE115011, here("Materials", "GSE115011", file = "exp_GSE115011.rds")) ``` # Load objects object or take from the environment ```{r} exp_GSE115011 <- readRDS(here("Materials", "GSE115011", "exp_GSE115011.rds")) alt.co.oligos <- readRDS(here("data", "Oligos_only", "alt.co.oligos.rds")) head(alt.co.oligos@meta.data,3) # CleanUp the meta.data keep.metadata.co <- c("orig.ident", "Cells", "Sample", "Phase", "nFeature_RNA", "detected", "sum", "total", "sizeFactor", "scDblFinder.class", "percent.mito", "RNA_snn_res.1", "scSorter_predID", "Origins", "nCount_RNA", "log10GenesPerUMI", "Chemistry", "ManualClusters", "ClusterID") alt.co.oligos@meta.data <- alt.co.oligos@meta.data[, keep.metadata.co] head(alt.co.oligos@meta.data, 2) # Indicate that the counts and features are from the previous object with all the celltypes # Note: I want to keep columns present in both objects with the same names, i.e. "orig.ident", "nCount_RNA", "nFeature_RNA", "Cells", "percent.mito" alt.co.oligos$NK_detected <- alt.co.oligos$detected alt.co.oligos$NK_total <- alt.co.oligos$total alt.co.oligos$NK_Sample <- alt.co.oligos$Sample alt.co.oligos$NK_Phase <- alt.co.oligos$Phase alt.co.oligos$NK_sum <- alt.co.oligos$sum alt.co.oligos$NK_sizeFactor <- alt.co.oligos$sizeFactor alt.co.oligos$NK_scDblFinder.class <- alt.co.oligos$scDblFinder.class alt.co.oligos$NK_RNA_snn_res.1 <- alt.co.oligos$RNA_snn_res.1 alt.co.oligos$NK_scSorter_predID <- alt.co.oligos$scSorter_predID alt.co.oligos$NK_OriginPopulation <- alt.co.oligos$Origins alt.co.oligos$NK_log10GenesPerUMI <- alt.co.oligos$log10GenesPerUMI alt.co.oligos$NK_Chemistry <- alt.co.oligos$Chemistry alt.co.oligos$NK_ManualClusters <- alt.co.oligos$ManualClusters alt.co.oligos$NK_ClusterID <- alt.co.oligos$ClusterID alt.co.oligos$detected <- NULL alt.co.oligos$total <- NULL alt.co.oligos$Sample <- NULL alt.co.oligos$Phase <- NULL alt.co.oligos$sum <- NULL alt.co.oligos$sizeFactor <- NULL alt.co.oligos$scDblFinder.class <- NULL alt.co.oligos$RNA_snn_res.1 <- NULL alt.co.oligos$scSorter_predID <- NULL alt.co.oligos$Origins <- NULL alt.co.oligos$log10GenesPerUMI <- NULL alt.co.oligos$Chemistry <- NULL alt.co.oligos$ManualClusters <- NULL alt.co.oligos$ClusterID <- NULL head(alt.co.oligos) keep.metadata.exp_GSE115011 <- c("orig.ident", "nFeature_RNA", "nCount_RNA") exp_GSE115011@meta.data <- exp_GSE115011@meta.data[, keep.metadata.exp_GSE115011] # Calculate the percentage of mitochondrial genes exp_GSE115011[["percent.mito"]] <- PercentageFeatureSet(exp_GSE115011, pattern = "^MT-") # Label the cells origin exp_GSE115011@meta.data$Cells <- "exp_GSE115011" head(exp_GSE115011) ``` # Add a dataset column ```{r} exp_GSE115011@meta.data$dataset <- "Marton et al., 2019" alt.co.oligos@meta.data$dataset <- "Kazakou" ``` # Integrate datasets ```{r} # Create a list of objects srt_list <- list(alt.co.oligos, exp_GSE115011) # normalize and identify variable features for each dataset independently srt_list <- lapply(X = srt_list, FUN = function(x) { x <- NormalizeData(x) x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000) }) # select features that are repeatedly variable across datasets for integration var_features <- SelectIntegrationFeatures(object.list = srt_list) # create anchors to integrate the datasets anchors <- FindIntegrationAnchors(object.list = srt_list, anchor.features = var_features, k.anchor = 5, k.filter = NA) #for small datasets # create an 'integrated' data assay ol_comb <- IntegrateData(anchorset = anchors, dims = 1:20) ``` # Run the standard workflow for visualisation & clustering ```{r} DefaultAssay(ol_comb) <- "integrated" Idents(ol_comb) <- "dataset" # Scale data all_genes <- rownames(ol_comb) ol_comb <- ScaleData(ol_comb, features = all_genes) # Dimentionality reduction ol_comb <- RunPCA(ol_comb, npcs = 30, verbose = FALSE) # Choose the mumber of PCs ElbowPlot(ol_comb, ndims = 50) #20 # Clustering ol_comb <- RunUMAP(ol_comb, reduction = "pca", dims = 1:20) ol_comb <- FindNeighbors(ol_comb, reduction = "pca", dims = 1:20) ol_comb <- FindClusters(ol_comb, resolution = c(0.05, 0.1, 0.3, 0.5, 0.7, 0.8, 1.0)) colnames(ol_comb@meta.data) ``` # Visualization ```{r} DimPlot(ol_comb, pt.size = 1, cols = mycoloursP[5:40]) & NoAxes() DimPlot(ol_comb, pt.size = 1, group.by = "dataset", cols = mycoloursP[5:40]) & NoAxes() DimPlot(ol_comb, group.by = "dataset", split.by = "dataset", pt.size = 1, cols = mycoloursP[5:40]) & NoAxes() DimPlot(ol_comb, group.by = "NK_RNA_snn_res.1", pt.size = 1, cols = mycoloursP[15:40], label = TRUE) & NoAxes() & NoLegend() DimPlot(ol_comb, group.by = "NK_RNA_snn_res.1", split.by = "dataset", pt.size = 1, cols = mycoloursP[15:40], label = TRUE) & NoAxes() & NoLegend() ``` ```{r fig.width=8, fig.height=10, fig.fullwidth=TRUE} DefaultAssay(ol_comb) <- "RNA" FeaturePlot(ol_comb, reduction = "umap", pt.size = 1, features = c("PDGFRA", "MBP", "PLP1", "MAG", "MOG", "MYRF", "GPR17", "EGFR", "SPARC"), ncol = 2) & NoAxes() ``` ```{r} saveRDS(ol_comb, here("Materials", "GSE115011", file = "ol_comb_alt.co.oligos_pasca.rds")) ``` # Analyse integrated datasets ```{r} # Generate DimPlots for all the different resolutions dir.create(here("Materials", "GSE115011", "Integration_Analysis", "DimPlots_allRes")) plot_list_func <- function(seur_obj, col_pattern, plot_cols, clust_lab = TRUE, label_size = 8, num_col = 4, save_dir = getwd(), width=7, height=5){ extr_res_col <- grep(pattern = col_pattern, names(seur_obj@meta.data)) res_names <- names(seur_obj@meta.data[extr_res_col]) # gtools function, sorts gene_names alphanumeric: res_names <- mixedsort(res_names) plot_l <-list() for(i in 1: length(res_names)){ pdf(paste0(save_dir, res_names[i], "_umap.pdf"), width=width, height=height) dim_plot <- DimPlot(seur_obj, reduction = "umap", cols= plot_cols, group.by = res_names[i], label = clust_lab, label.size = label_size) + NoLegend() print(dim_plot) dev.off() print(dim_plot) } } plot_list_func(seur_obj = ol_comb, col_pattern = "integrated_snn_res.", plot_cols = mycoloursP[6:40], save_dir = here("Materials", "GSE115011", "Integration_Analysis", "DimPlots_allRes")) ``` ## Run approximation models # Load libraries ```{r} library(SingleCellExperiment) library(devtools) library(Matrix) library(scales) library(clustree) library(gtools) library(cluster) library(bluster) ``` ```{r fig.width=9, fig.height=10, fig.fullwidth=TRUE} # Plot a ClusterTree dir.create(here("Materials", "GSE115011", "Integration_Analysis", "ClusterTree")) #For ploting all the resolutions clust_tree <- clustree(ol_comb, prefix = "integrated_snn_res.", edge_arrow=FALSE) # Maybe res.0.6? print(clust_tree) ``` Notes: In the lowest resolution (0.0.5), Cluster 0 seems to be progenitor cells, while cluster 1 corresponds to mature OL. ```{r} Idents(ol_comb) <- "integrated_snn_res.0.05" DefaultAssay(ol_comb) <- "RNA" DimPlot(ol_comb) FeaturePlot(ol_comb, features = c("OLIG2", "SOX10", "PDGFRA", "MBP"), label = T) ``` ```{r fig.width=10, fig.height=8, fig.fullwidth=TRUE} # Approximate Silhouette Plots dir.create(here("Materials", "GSE115011", "Integration_Analysis", "Silhouette_Plots")) dir.create(here("Materials", "GSE115011", "Integration_Analysis", "ClusterPurity_Plots")) ol_comb_sce <- as.SingleCellExperiment(ol_comb) #1.Approximate Silhouette approx_sil <- function(sce_obj, reduction = "PCA", col_pattern = "integrated_snn_res.", plot_cols, clust_lab = TRUE, label_size = 8, save_dir = getwd(), width=7, height=5){ res_col <- grep(pattern = col_pattern, names(colData(sce_obj))) names_col <- names(colData(sce_obj))[res_col] # gtools function, sorts gene_names alphanumeric: names_col <- mixedsort(names_col) met_dat <- as.data.frame(colData(ol_comb_sce)) for(i in 1: length(names_col)){ clust <- met_dat[[names_col[i]]] clust_int <- as.integer(paste0(clust)) sil_approx <- approxSilhouette(reducedDim(sce_obj, reduction), clusters = clust_int) sil_data <- as.data.frame(sil_approx) sil_data$closest <- factor(ifelse(sil_data$width > 0, clust_int, sil_data$other)) sil_data$cluster <- factor(clust_int) apr_sil_plot <-ggplot(sil_data, aes(x=cluster, y=width, colour=closest)) + ggbeeswarm::geom_quasirandom(method="smiley") + theme_bw(20) + xlab(names_col[i]) pdf(paste0(save_dir, names_col[i], "_sil.pdf"), width=width, height=height) print(apr_sil_plot) dev.off() print(apr_sil_plot) } print("Done") } approx_sil(sce_obj = ol_comb_sce, col_pattern = "integrated_snn_res.", plot_cols = mycoloursP[6:40], save_dir =here("Materials", "GSE115011", "Integration_Analysis", "Silhouette_Plots")) #2.Cluster Purity clu_pure <- function(sce_obj, reduction = "PCA", col_pattern = "integrated_snn_res.", plot_cols, clust_lab = TRUE, label_size = 8, save_dir = getwd(), width=7, height=5){ res_col <- grep(pattern = col_pattern, names(colData(sce_obj))) names_col <- names(colData(sce_obj))[res_col] # gtools function, sorts gene_names alphanumeric: names_col <- mixedsort(names_col) met_dat <- as.data.frame(colData(ol_comb_sce)) for(i in 1: length(names_col)){ clust <- met_dat[[names_col[i]]] clust_int <- as.integer(paste0(clust)) pure <- neighborPurity(reducedDim(sce_obj, reduction), clusters = clust_int) pure_data <- as.data.frame(pure) pure_data$maximum <- factor(pure_data$maximum) pure_data$cluster <- factor(clust_int) pure_plot <- ggplot(pure_data, aes(x=cluster, y=purity, colour=maximum)) + ggbeeswarm::geom_quasirandom(method="smiley") + theme_bw(20) + xlab(names_col[i]) pdf(paste0(save_dir, names_col[i], "_sil.pdf"), width=width, height=height) print(pure_plot) dev.off() print(pure_plot) } print("Done") } clu_pure(sce_obj = ol_comb_sce, col_pattern = "integrated_snn_res.", plot_cols = mycoloursP[6:40], save_dir = here("Materials", "GSE115011", "Integration_Analysis", "ClusterPurity_Plots")) ``` Notes: The OL population in the ClusterTree seems to be stable in one cluster until resolution 0.7. After that a new mix of progenitors and OLs takes place. Based on the Purity plots, maybe resolution 0.5 would be better though. # Cell Cycle Annotation ```{r} # A list of cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat. s.genes <- cc.genes$s.genes g2m.genes <- cc.genes$g2m.genes ol_comb <- CellCycleScoring(ol_comb, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE) head(ol_comb@meta.data) DimPlot(ol_comb, group.by = "Phase", cols = mycoloursP, label = T) DimPlot(ol_comb, split.by = "Phase", group.by = "dataset", cols = mycoloursP) ``` # Cluster Annotation for resultion 0.7 ```{r} Idents(ol_comb) <- "integrated_snn_res.0.7" DimPlot(ol_comb, cols = mycoloursP, label = TRUE, pt.size = 1) & NoLegend() DimPlot(ol_comb, cols = mycoloursP[10:40], group.by = "dataset") ``` ```{r fig.width=12, fig.height=5, fig.fullwidth=TRUE} DimPlot(ol_comb, cols = mycoloursP, split.by = "NK_Sample", group.by = "integrated_snn_res.0.7", label = TRUE, pt.size = 1) & NoLegend() ``` ```{r} # Check that all samples contribute to the formed clusters IDsPerCluster_ol_comb_0.7 <- as.data.frame(tapply(ol_comb@meta.data$dataset, ol_comb@meta.data$integrated_snn_res.0.7, function(x) length(unique(x)) )) names(IDsPerCluster_ol_comb_0.7) <- "NumberOfIDs_ol_comb_0.7" IDsPerCluster_ol_comb_0.7$NumberOfIDs_ol_comb_0.7 <- rownames(IDsPerCluster_ol_comb_0.7) IDsPerCluster_ol_comb_0.7$Cluster <- rownames(IDsPerCluster_ol_comb_0.7) IDsPerCluster_ol_comb_0.7 # Identify the number of cells per cluster n_cells_ol_comb_0.7 <- FetchData(ol_comb, vars = c("integrated_snn_res.0.7", "dataset")) %>% dplyr::count(integrated_snn_res.0.7, dataset) %>% tidyr::spread(integrated_snn_res.0.7, n) n_cells_ol_comb_0.7 dir.create(here("Materials", "GSE115011", "Integration_Analysis", "Integrated_Res.0.7")) write.csv(n_cells_ol_comb_0.7, here("Materials", "GSE115011", "Integration_Analysis", "Integrated_Res.0.7", file = "no.ofCellsperCluster_integrated_snn_res.0.7.csv")) ``` ```{r} DefaultAssay(ol_comb) <- "RNA" ``` ```{r fig.width=7, fig.height=6, fig.fullwidth=TRUE} VlnPlot(ol_comb, features = c("OLIG1", "OLIG2", "SOX10", "PPP1R16B"), cols = mycoloursP, pt.size = 0.2, ncol = 2) # Clusters 1 & 7 are the OL clusters # Clusters 2 & 5 seem to be really early? ``` ```{r fig.width=7, fig.height=6, fig.fullwidth=TRUE} VlnPlot(ol_comb, features = c("PDGFRA", "PCDH15", "SOX6"), cols = mycoloursP, pt.size = 0.2, ncol = 2) # Clusters 0, 3, 4, & maybe 6 are progenitor cells # Could Cluster 2 be COPs? and maybe 6 too? ``` ```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE} VlnPlot(ol_comb, features = c("GPR17", "MYC", "NKX2-2", "MYRF", "CLDN11"), cols = mycoloursP, pt.size = 0.2, ncol = 2) # Clusters 0 & 6 seem to be something like COPs ``` ```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE} VlnPlot(ol_comb, features = c("MBP", "CNP", "PLP1", "MOG", "MAG", "OSP", "MPZ"), cols = mycoloursP, pt.size = 0.2, ncol = 2) # Clusters 4 & 6 are really MBP positive ``` ```{r fig.width=7, fig.height=6, fig.fullwidth=TRUE} VlnPlot(ol_comb, features = c("DLL1", "DLL3", "EGFR"), cols = mycoloursP, pt.size = 0.2, ncol = 2) ``` ```{r fig.width=7, fig.height=6, fig.fullwidth=TRUE} VlnPlot(ol_comb, features = c("HOPX", "SPARCL1", "SPARC"), pt.size = 0.2, ncol = 2, cols = mycoloursP) ``` ```{r fig.width=7, fig.height=3, fig.fullwidth=TRUE} VlnPlot(ol_comb, features = c("GFAP", "SOX9"), cols = mycoloursP, pt.size = 0.2) ``` ```{r fig.width=7, fig.height=3, fig.fullwidth=TRUE} VlnPlot(ol_comb, features = c("MKI67", "TOP2A"), cols = mycoloursP, pt.size = 0.2) ``` ```{r fig.width=7, fig.height=12, fig.fullwidth=TRUE} FeaturePlot(ol_comb, features = c("OPALIN", "RBFOX1", "SPARC", "NELL1", "PAX3", "FMN1", "LAMA2", "SLC22A3"), pt.size = 0.7, ncol = 2, label = TRUE) VlnPlot(ol_comb, features = c("OPALIN", "RBFOX1", "SPARC", "NELL1", "PAX3", "FMN1", "LAMA2", "SLC22A3"), pt.size = 0.2, cols = mycoloursP, ncol = 2) ``` ```{r fig.width=15, fig.height=8, fig.fullwidth=TRUE} DotPlot(ol_comb, features = c("OLIG1", "OLIG2", "SOX10", "PPP1R16B", "PDGFRA", "PCDH15", "SOX6", "GPR17", "MYC", "NKX2-2", "MYRF", "CLDN11", "MBP", "CNP", "PLP1", "MOG", "MAG", "MPZ", "DLL1", "DLL3", "EGFR","HOPX", "SPARCL1", "SPARC", "GFAP", "SOX9", "OPALIN", "RBFOX1", "NELL1", "PAX3", "FMN1", "LAMA2", "SLC22A3"), cols = mycoloursP) + FontSize(7) ``` Notes: Cluster 7: Maturest OL (FMN1+), Cluster 1: Mature OL , Cluster 2: Pri-OPCs, Cluster 5: OAPC, Cluster 0: Typical OPCs, Cluster 4:?, Cluster 6?: # DE analysis ```{r} # All resolutions int_res_all_mark <- function(seur_obj, int_cols, only_pos = TRUE, min_pct = 0.25, logfc_threshold = 0.25, fil_pct_1 = 0.25, fil_pct_2 = 0.6, avg_log = 1.2, save_dir = getwd(), test_use = "MAST" ){ for(i in 1:length(int_cols)){ Idents(seur_obj) <- int_cols[i] all_mark <- FindAllMarkers(seur_obj, only.pos = only_pos, min.pct = min_pct, logfc.threshold = logfc_threshold, test.use = test_use) fil_mark<- subset(all_mark, all_mark$pct.1 > fil_pct_1 & all_mark$pct.2 < fil_pct_2 ) write.csv(all_mark, paste(save_dir, "/all_mark", int_cols[i], ".csv", sep = "" )) write.csv(fil_mark, paste(save_dir, "/fil_mark", int_cols[i], ".csv", sep = "" )) } } dir.create(here("Materials", "GSE115011", "Integration_Analysis", "DE_Analysis")) int_res_all_mark(ol_comb, int_cols = c("integrated_snn_res.0.05", "integrated_snn_res.0.1", "integrated_snn_res.0.3", "integrated_snn_res.0.5", "integrated_snn_res.0.7", "integrated_snn_res.0.8", "integrated_snn_res.1.0"), save_dir = here("Materials", "GSE115011", "Integration_Analysis", "DE_Analysis")) ``` ```{r} fil.de.markers.integrated_snn_res.0.7 <- read.csv(here("Materials", "GSE115011", "Integration_Analysis", "DE_Analysis", "fil_markintegrated_snn_res.0.7.csv")) head(fil.de.markers.integrated_snn_res.0.7) # Identify the top10 genes top10_integrated_snn_res.0.7 <- fil.de.markers.integrated_snn_res.0.7 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes top10_integrated_snn_res.0.7 # Calculate the cluster averages to plot those as well cluster_averages_integrated_snn_res.0.7 <- AverageExpression(ol_comb, group.by = "integrated_snn_res.0.7", return.seurat = TRUE) cluster_averages_integrated_snn_res.0.7@meta.data$cluster <- factor(levels(as.factor(ol_comb@meta.data$integrated_snn_res.0.7)), levels = c(0, 1, 2, 3, 4, 5, 6, 7)) ``` ```{r fig.width=7, fig.height=12, fig.fullwidth=TRUE} DefaultAssay(ol_comb) <- "integrated" DoHeatmap(ol_comb, features = top10_integrated_snn_res.0.7$gene, label = TRUE, group.by = "integrated_snn_res.0.7", group.colors = mycoloursP[6:40], draw.lines = FALSE, size = 3) DoHeatmap(object = cluster_averages_integrated_snn_res.0.7, features = top10_integrated_snn_res.0.7$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F) ``` # Cluster Annotation ```{r} ol_comb@meta.data$ClusterID <- ol_comb@meta.data$integrated_snn_res.0.7 colnames(ol_comb@meta.data) current.ids <- c("0", "1", "2", "3", "4", "5", "6", "7") new.ids <- c("OPC", "mOL1", "pri-OPC", "Cyc", "imOL", "OAPC", "other OPC", "mOL2") ol_comb@meta.data$ClusterID <- plyr::mapvalues(x = ol_comb@meta.data$ClusterID, from = current.ids, to = new.ids) head(ol_comb@meta.data) ``` ```{r} saveRDS(ol_comb, here("Materials", "GSE115011", file = "ol_comb_alt.co.oligos_pasca.rds")) ``` # Plots ```{r} Idents(ol_comb) <- "ClusterID" DimPlot(ol_comb, split.by = "dataset", cols = mycoloursP[12:40], pt.size = 1, label = FALSE) & NoAxes() ``` ```{r fig.width=7, fig.height=12, fig.fullwidth=TRUE} DefaultAssay(ol_comb) <- "integrated" cluster_averages_ClusterID <- AverageExpression(ol_comb, group.by = "ClusterID", return.seurat = TRUE) cluster_averages_ClusterID@meta.data$HM <- factor(levels(as.factor(ol_comb@meta.data$ClusterID)), levels = c("OPC", "mOL1", "pri-OPC", "Cyc", "imOL", "OPAC", "other OPC", "mOL2")) DoHeatmap(object = cluster_averages_ClusterID, features = top10_integrated_snn_res.0.7$gene, label = TRUE, group.by = "HM", group.colors = mycoloursP[6:40], draw.lines = F) saveRDS(cluster_averages_ClusterID, here("Materials", "GSE115011", file = "cluster_averages_ClusterID.rds")) ``` ```{r} sessionInfo() ```