--- title: "Int_RC17_ol_PrimaryCulturedMouseOligodendroglia" author: "Nina-Lydia Kazakou" date: "24/04/2021" output: html_document --- This original script for integrating RC17_ol with primary cultured rodent oligodendroglia is edited on the 05/04/2022. The edits include mostly changing the patways to the original folders containing the data in order to produce a final report and include this analysis into the total monolayer oligodendroglia final analysis. # Set-up ### Load libraries ```{r message=FALSE, warning=FALSE} library(Seurat) library(here) library(ggsci) library(ggplot2) library(dplyr) library(biomaRt) library(SingleCellExperiment) library(scater) library(RColorBrewer) library(corrplot) ``` ### Load colour palets ```{r} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3<-pal_lancet("lanonc", alpha = 0.7)(9) mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4) ``` For integrating the int_mus with the human dataset, gene names have to be the same. For converting int_mus nomenclature to human gene names, I use biomaRt and filter out genes or which no human nomenclature exists. # Convert mouse gene names to human genes names ### Use library "biomaRT" ```{r eval=FALSE} dir.create(here("data", "Processed", "Integrations", "PrimaryCulturedRodentOligodendroglia")) dir.create(here("data", "Processed", "Integrations", "PrimaryCulturedRodentOligodendroglia", "Rodent_CellRangerOutputs")) ``` ```{r eval=FALSE} listMarts() ensembl <- useMart("ensembl") ensembl_human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl") ensembl_mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl") # Load raw_counts or take from environment raw_counts <- Read10X(data.dir = here("data", "Processed", "Integrations", "PrimaryCulturedRodentOligodendroglia", "Rodent_CellRangerOutputs")) mouse_genes <- rownames(raw_counts) matched_genes <- getLDS(attributes = c("mgi_symbol", "chromosome_name"), filters = "mgi_symbol", values = mouse_genes, mart = ensembl_mouse, attributesL = c("hgnc_symbol", "chromosome_name", "start_position", "end_position") , martL = ensembl_human) matched_genes <- subset(matched_genes, matched_genes$HGNC.symbol != "") subset_boul <- rownames(raw_counts) %in% matched_genes$MGI.symbol raw_counts <- raw_counts[subset_boul,] subs_boul_2 <- matched_genes$MGI.symbol %in% rownames(raw_counts) # match the order of matched_genes$MGI.symbol wieh rownames(raw_counts) matched_genes <- matched_genes[match(rownames(raw_counts), matched_genes$MGI.symbol),] rownames(raw_counts) <- make.unique(rownames(raw_counts)) matched_genes$MGI.symbol <- make.unique(matched_genes$MGI.symbol) matched_genes$HGNC.symbol <- make.unique(matched_genes$HGNC.symbol) summary(rownames(raw_counts) == matched_genes$MGI.symbol) rownames(raw_counts) <- matched_genes$HGNC.symbol int_mus <- CreateSeuratObject(counts = raw_counts) ``` ```{r eval=FALSE} # check how mt genes are called in the int_mus genome int_mus[["percent.mito"]] <- PercentageFeatureSet(int_mus, pattern = "^MT-") head(int_mus@meta.data) VlnPlot(int_mus, features = c("nFeature_RNA", "nCount_RNA", "percent.mito"), ncol = 3) FeatureScatter(int_mus, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") ``` # Re-apply previous analysis for mouse_oligodendroglia ### QC ```{r eval=FALSE, sce} # Create SingleCellExperiment object mouse_sce <- as.SingleCellExperiment(int_mus) ``` ### FeatureQC ```{r eval=FALSE, featureQC} keep_feature <- rowSums(counts(mouse_sce) > 0) > 1 mouse_sce <- mouse_sce[keep_feature, ] nrow(mouse_sce) #17762 mouse_sce <- addPerFeatureQC(mouse_sce) colnames(rowData(mouse_sce)) ``` ### CellQC ```{r eval=FALSE, cellQC} # Identify mitochondrial genes is_mito <- grepl("^mt-", rownames(mouse_sce)) mouse_sce <- addPerCellQC(mouse_sce, subsets = list(mt = is_mito)) outlier_mt <- isOutlier(mouse_sce$subsets_mt_percent, type = "higher") ``` ```{r eval=FALSE, lib_low} df <- addPerCellQC(mouse_sce, subsets = list(mt = is_mito)) qc.lib.lower <- isOutlier(df$sum, log=TRUE, type="lower") lowUMI<-as.numeric(attr(qc.lib.lower, "thresholds")[2]) if(lowUMI == Inf){ lowUMI <-0 } if(min(df$sum)>= lowUMI){ print("Filter for low UMI counts are too permissive. Apply manual set threshold. Default is 100") qc.lib.lower<- df$sum<20 lowUMI<-20 }else{ print("Calculated thresholds seem to filter for low UMI counts") } ``` ```{reval=FALSE, } qc.lib.low <- df$sum < 3000 ``` ```{r eval=FALSE, lib_high} qc.lib.higher <-isOutlier(df$sum, log=TRUE, type="higher") higherUMI<-as.numeric(attr(qc.lib.higher, "thresholds")[2]) if(max(df$sum)<= higherUMI){ print("Filter for high UMI counts too permissive. Apply manual set threshold.") qc.lib.higher<- df$sum > 30000 higherUMI<-30000 }else{ print("Calculated thresholds seem to filter for high UMI counts") } ``` ```{r eval=FALSE} attr(qc.lib.higher, "thresholds") ``` ```{r eval=FALSE, epxr_low} qc.nexprs.lower <- isOutlier(df$detected, log=TRUE, type="lower") lowerFeatures<-as.numeric(attr(qc.nexprs.lower, "thresholds")[2]) if(min(df$detected)>= lowerFeatures | lowerFeatures < 10){ print("Filter for high gene counts too permissive. Apply manual set threshold.") qc.nexprs.lower<-(df$detected)<10 lowerFeatures<-10 }else{ print("Calculated thresholds seem to filter for high gene counts") } ``` ```{r eval=FALSE} attr(qc.nexprs.lower, "thresholds") ``` ```{r eval=FALSE, expr_high} qc.nexprs.higher <- isOutlier(df$detected, log=TRUE, type="higher") higherFeatures<-as.numeric(attr(qc.nexprs.higher, "thresholds")[2]) if(max(df$detected)<= higherFeatures){ print("Filter for high gene counts too permissive. Apply manual set threshold.") qc.nexprs.higher<-(df$detected)> 8000 higherFeatures<-8000 }else{ print("Calculated thresholds seem to filter for high gene counts") } ``` ```{r eval=FALSE} attr(qc.nexprs.higher, "thresholds") ``` ```{r eval=FALSE} # total outlier <- qc.lib.low | qc.nexprs.lower | qc.lib.higher | qc.nexprs.higher | outlier_mt mouse_sce$outlier <- outlier ``` ### Subset data based on QC ```{r eval=FALSE} filter_sce <- mouse_sce[,!mouse_sce$outlier] ``` ### Normazlise data ```{r eval=FALSE} int_mus <- as.Seurat(filter_sce) ``` ### Populate metadata ```{r eval=FALSE} int_mus@meta.data$ident <- NULL int_mus@meta.data$label <- NULL # 10x kit version int_mus@meta.data$Chemistry <- "v2" ``` ```{r eval=FALSE} int_mus <- NormalizeData(int_mus, normalization.method = "LogNormalize", scale.factor = 10000) ``` ### Find Variable Features ```{r eval=FALSE} int_mus <- FindVariableFeatures(int_mus, selection.method = "vst", nfeatures = 2000) ``` ### Scale Data ```{r eval=FALSE} all_genes <- rownames(int_mus) int_mus <- ScaleData(int_mus, features = all_genes) ``` ### Dimensionality Reduction ```{r eval=FALSE} int_mus <- RunPCA(int_mus, features = VariableFeatures(object = int_mus), npcs = 20) int_mus <- FindNeighbors(int_mus, dims = 1:15) int_mus <- FindClusters(int_mus, resolution = 0.8) int_mus <- RunUMAP(int_mus, dims = 1:15) ``` ```{r eval=FALSE} DimPlot(int_mus, group.by = "RNA_snn_res.0.8", cols = mycoloursP, pt.size = 0.8, label = TRUE) & NoLegend() ``` ### Annotate metadata ```{r eval=FALSE} int_mus@meta.data$ident <- NULL ``` ```{r eval=FALSE} Idents(int_mus) <- "RNA_snn_res.0.8" int_mus@meta.data$ClusterID <- int_mus@meta.data$RNA_snn_res.0.8 current.ids <- c(0,1,2,3 ,4,5,6,7,8,9,10,11) new.ids <- c("m.mOL1", "m.mOL2", "m.OPC1", "m.CyP1", "m.mOL3", "m.Outer_Radial_Glia", "m.Astrocyte", "m.COP", "m.OPC2", "m.Cyp2", "m.Pericyte", "m.Neurons") int_mus@meta.data$ClusterID <- plyr::mapvalues(x = int_mus@meta.data$ClusterID, from = current.ids, to = new.ids) ``` ```{r eval=FALSE} head(int_mus@meta.data, 2) ``` ```{r eval=FALSE} saveRDS(int_mus, here("data", "Processed", "Integrations", "PrimaryCulturedRodentOligodendroglia", "PrimaryCulturedMouseOL_with_HumanGeneAnnotation")) ``` # Integration with RC17_ol ### Load data ```{r} RC17_ol <- readRDS(here("data", "Processed", "Oligodendroglia", "hES-derived_monolayerOL.rds")) int_mus <- readRDS(here("data", "Processed", "Integrations", "PrimaryCulturedRodentOligodendroglia", "PrimaryCulturedMouseOL_with_HumanGeneAnnotation")) ``` ```{r} DimPlot(RC17_ol, group.by = "ClusterID", cols = mycoloursP[6:40], pt.size = 0.8, label = TRUE) & NoLegend() DimPlot(int_mus, group.by = "ClusterID", cols = mycoloursP, pt.size = 0.8, label = TRUE) & NoLegend() ``` ### Subset mouse oligodendroglia for non-oligo clusters ```{r} Idents(int_mus) <- "ClusterID" int_mus@meta.data$Dataset <- "Primary_Mouse_Cultured_Oligodendroglia" int_mus <- subset(int_mus, idents = c("m.mOL1", "m.mOL2", "m.mOL3", "m.OPC1", "m.OPC2", "m.COP", "m.CyP1", "m.Cyp2")) ``` ## CCA ```{r eval=FALSE, Integrate_Datasets} # Create a list of objects srt_list <- list(RC17_ol, int_mus) # normalize and identify variable features for each dataset independently srt_list<- lapply(X = srt_list, FUN = function(x) { x <- NormalizeData(x) x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000) }) # select features that are repeatedly variable across datasets for integration var_features <- SelectIntegrationFeatures(object.list = srt_list) # create anchors to integrate the datasets anchors <- FindIntegrationAnchors(object.list = srt_list, anchor.features = var_features) # create an 'integrated' data assay ol_comb <- IntegrateData(anchorset = anchors, dims = 1:20) ``` ```{r eval=FALSE, Run_Standard_Workflow} DefaultAssay(ol_comb) <- "integrated" Idents(ol_comb) <- "Dataset" # Scale data all_genes <- rownames(ol_comb) ol_comb <- ScaleData(ol_comb, features = all_genes) # Dimentionality reduction ol_comb <- RunPCA(ol_comb, npcs = 30, verbose = FALSE) # Choose the mumber of PCs ElbowPlot(ol_comb, ndims = 50) #20 # Clustering ol_comb <- RunUMAP(ol_comb, reduction = "pca", dims = 1:20) ol_comb <- FindNeighbors(ol_comb, reduction = "pca", dims = 1:20) ol_comb <- FindClusters(ol_comb, resolution = 0.5) ``` ```{r eval=FALSE} saveRDS(ol_comb, here("data", "Processed", "Integrations", "PrimaryCulturedRodentOligodendroglia", "RC17_ol_Cultured_Mouse_ol.rds")) ``` ```{r} ol_comb <- readRDS(here("data", "Processed", "Integrations", "PrimaryCulturedRodentOligodendroglia", "RC17_ol_Cultured_Mouse_ol.rds")) ``` ## Visualization ```{r fig.height=4, fig.width=8} DimPlot(ol_comb, pt.size = 0.8, cols = c(mycoloursP[6], mycoloursP[45]), group.by = "Dataset") ``` ```{r fig.height=4, fig.width=10} DimPlot(ol_comb, pt.size = 0.8, cols = c(mycoloursP[6], mycoloursP[45]), group.by = "Dataset", split.by = "Dataset") & NoLegend() ``` ```{r fig.height=4, fig.width=9} DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID") ``` ```{r fig.height=4, fig.width=10} DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID", split.by = "Dataset", label = FALSE) & NoLegend() DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID", split.by = "Dataset", label = TRUE, label.size = 3) & NoLegend() ``` ## Label Transfer ```{r} lt_anchors <- FindTransferAnchors(reference = RC17_ol , query = int_mus, dims = 1:30) predictions <- TransferData(anchorset = lt_anchors, refdata = RC17_ol$ClusterID, dims = 1:30) lt_int_mus <- AddMetaData(int_mus, metadata = predictions) ``` ```{r fig.width=10, fig.height=6, fig.fullwidth=TRUE} DimPlot(lt_int_mus, group.by = "predicted.id", split.by = "predicted.id", cols = mycoloursP[3:40], ncol = 2, pt.size = 1) ``` # Correlation between ol ID and predicted ID ```{r} met_dat <- lt_int_mus@meta.data cor_df <- data.frame(orig_id = met_dat$ClusterID, pred_id = met_dat$predicted.id) cor_df$pred_id <- as.factor(cor_df$pred_id) cor_df$pred_id <- factor(cor_df$pred_id, levels = c("OPC_1", "pri-OPC", "OPC_2", "MAO", "OAPC", "OLIGO_1", "CyP_1", "Cyp_2", "OLIGO_2", "OPC_3")) cor_matr <- matrix(data= 0, nrow = length(levels(cor_df$orig_id)), ncol = length(levels(cor_df$pred_id))) rownames(cor_matr) <- levels(cor_df$orig_id) colnames(cor_matr) <- levels(cor_df$pred_id) for(i in 1: nrow(cor_df)){ name1 <- as.character(cor_df[i, 1]) name2 <- as.character(cor_df[i, 2]) cor_matr[name1, name2] <- cor_matr[name1, name2] + 1 } max(cor_matr) centr_cor_matr <- cor_matr/ max(cor_matr) corrplot(centr_cor_matr) ``` ```{r} sessionInfo() ```