--- title: "07_Dim_red_Rough_annot" author: "Luise A. Seeker" date: "05/02/2021" output: html_document --- ```{r} library(Seurat) library(ggplot2) library(ggsci) library("scales") library(SingleCellExperiment) library(BiocSingular) library(scDblFinder) library(scran) library(scater) library(RCurl) library(AnnotationHub) ``` Pick colour paletts ```{r} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9) mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7) show_col(mycoloursP, labels =F) ``` ```{r} seur_comb <- readRDS("/Users/lseeker/Documents/Work/HumanCellAtlas/splice_control_out/datasets/04_scran_normalised/combined_SCE/combined_seur_norm_raw.RDS") ``` #the dataset has been normalised before, so it is not notmalised again. Instead, variable features are searched: ```{r} seur_comb <- FindVariableFeatures(seur_comb, selection.method = "vst", nfeatures = 2000) ``` # Identify the 10 most highly variable genes ```{r} top10 <- head(VariableFeatures(seur_comb), 10) ``` # plot variable features with and without labels ```{r, fig.width=10, fig.height=5, fig.fullwidth=TRUE} plotA <- VariableFeaturePlot(seur_comb, cols = mycoloursP[4:5]) plotB <- LabelPoints(plot = plotA, points = top10, repel = TRUE) plotA + plotB ``` #scale all genes (if there is a batch effect I could add vars to regress here) ```{r} all_genes <- rownames(seur_comb) seur_comb <- ScaleData(seur_comb, features = all_genes) ``` #linear dimensional reduction ```{r} seur_comb <- RunPCA(seur_comb, features = VariableFeatures(object = seur_comb), npcs = 100) ``` #visualise ```{r, fig.width=10, fig.height=8, fig.fullwidth=TRUE} VizDimLoadings(seur_comb, dims = 1:2, reduction = "pca") ``` ```{r} DimPlot(seur_comb, reduction = "pca", cols= mycoloursP[22:40]) ``` ```{r} DimHeatmap(seur_comb, dims = 1, cells = 500, balanced = TRUE) ``` ```{r, fig.width=10, fig.height=55, fig.fullwidth=TRUE} DimHeatmap(seur_comb, dims = 1:50, cells = 500, balanced = TRUE) ``` Dimensional reduction ```{r} ElbowPlot(seur_comb, ndims = 100) ``` Horn's paralell analysis ```{r} comb_sce<- as.SingleCellExperiment(seur_comb) var_feat<-VariableFeatures(seur_comb) set.seed(100010) horn <- PCAtools::parallelPCA(logcounts(comb_sce)[var_feat,], BSPARAM=BiocSingular::IrlbaParam(), niters=10) horn$n var_horn <- horn$original$variance plot(var_horn, type="b", log="y", pch=16, xlab = "Principal components", ylab = "Variance explained") permuted <- horn$permuted for (i in seq_len(ncol(permuted))) { points(permuted[,i], col="grey80", pch=16) lines(permuted[,i], col="grey80", pch=16) } abline(v=horn$n, col="red") ``` ```{r} seur_comb <- FindNeighbors(seur_comb, dims = 1:25) seur_comb <- FindClusters(seur_comb, resolution = 0.5) ``` ```{r} seur_comb <- RunUMAP(seur_comb, dims = 1:25) seur_comb <- RunTSNE(seur_comb, dims = 1:25) #Idents(seur_comb)<-"seurat_clusters" DimPlot(seur_comb, reduction = "umap", label = T, cols = mycoloursP, pt.size = 0.5) ``` ```{r} DimPlot(seur_comb, reduction = "tsne", label = T, cols = mycoloursP, pt.size = 0.5) ``` ```{r, fig.width=7, fig.height=4, fig.fullwidth=TRUE} DimPlot(seur_comb, reduction = "pca", label = F, cols = mycoloursP, pt.size = 2) ``` ```{r} seur_comb <- FindClusters(seur_comb, resolution = 0.8 ) Idents(seur_comb) <- "RNA_snn_res.0.8" DimPlot(object = seur_comb, reduction = "umap", label=T, cols = mycoloursP) DimPlot(object = seur_comb, reduction = "umap", label=F, cols = mycoloursP) ``` ```{r} Idents(seur_comb) <- "RNA_snn_res.0.8" DimPlot(object = seur_comb, reduction = "tsne", label=T, cols = mycoloursP) DimPlot(object = seur_comb, reduction = "tsne", label=F, cols = mycoloursP) ``` ```{r} Idents(seur_comb) <- "RNA_snn_res.0.8" DimPlot(object = seur_comb, reduction = "pca", label=F, cols = mycoloursP) ``` How many individuals contribute to every cluster: ```{r} IDsPerCluster <- as.data.frame(tapply(seur_comb@meta.data$caseNO, seur_comb@meta.data$RNA_snn_res.0.8, function(x) length(unique(x)) )) names(IDsPerCluster) <- "NumberOfIDs" IDsPerCluster$RNA_snn_res.0.8 <- rownames(IDsPerCluster) IDsPerCluster$Cluster <- rownames(IDsPerCluster) IDsPerCluster ``` ```{r} met_data <- seur_comb@meta.data met_data$Cluster <- met_data$RNA_snn_res.0.8 merged_dat <- merge(met_data, IDsPerCluster, by = "Cluster", all = TRUE) merged_dat$Barcode == met_data$Barcode barcode_oder <- met_data$Barcode merged_dat <- merged_dat[match(barcode_oder, merged_dat$Barcode),] merged_dat$Barcode == met_data$Barcode id_vector <- merged_dat$NumberOfIDs seur_comb$CountsPerCluster_res_0_8 <- id_vector #seur_comb@meta.data$Cluster_0_5<-seur_comb@meta.data$RNA_snn_res.0.5 DimPlot(seur_comb, reduction = "umap", label = F, cols = mycoloursP[3:40], group.by = "CountsPerCluster_res_0_8") ``` ```{r} seur_comb@meta.data$IdCountGroup <- ifelse(seur_comb@meta.data$CountsPerCluster_res_0_8 < 10, "< 10", ifelse(seur_comb@meta.data$CountsPerCluster_res_0_8 == 20, 20, paste(seur_comb@meta.data$CountsPerCluster_res_0_8))) Idents(seur_comb)<-"IdCountGroup" DimPlot(seur_comb, reduction = "umap", label = F, cols = mycoloursP[20:30]) ``` How many samples contribute to each cluster ```{r} samples_per_cluster <- as.data.frame(tapply(seur_comb@meta.data$process_number, seur_comb@meta.data$RNA_snn_res.0.8, function(x) length(unique(x)) )) names(samples_per_cluster) <- "samples_per_cluster" samples_per_cluster$RNA_snn_res.0.8 <- rownames(samples_per_cluster) samples_per_cluster$Cluster <- rownames(samples_per_cluster) samples_per_cluster ``` ```{r} merged_dat <- merge(merged_dat, samples_per_cluster, by = "Cluster", all = TRUE) #merged_dat$Barcode == met_data$Barcode barcode_oder <- met_data$Barcode merged_dat <- merged_dat[match(barcode_oder, merged_dat$Barcode),] merged_dat$Barcode == met_data$Barcode sample_vector <- merged_dat$samples_per_cluster seur_comb$samplesPerCluster_res_0_8 <- sample_vector #seur_comb@meta.data$Cluster_0_5<-seur_comb@meta.data$RNA_snn_res.0.5 DimPlot(seur_comb, reduction = "umap", label = F, cols = mycoloursP[8:50], group.by = "samplesPerCluster_res_0_8") ``` ```{r} seur_comb@meta.data$qurtile_sample__per_cl_group seur_comb@meta.data$sample_count_per_cl_group <- ifelse(seur_comb@meta.data$samplesPerCluster_res_0_8 < 10, "< 10", ifelse(seur_comb@meta.data$samplesPerCluster_res_0_8 == 58, "58 (All)", ifelse(between(seur_comb@meta.data$samplesPerCluster_res_0_8, 10, 29), "10 - 29", ifelse(between(seur_comb@meta.data$samplesPerCluster_res_0_8, 30, 39), "30 - 39", ifelse(between(seur_comb@meta.data$samplesPerCluster_res_0_8, 40, 49), "40 - 49", "50 - 57"))))) DimPlot(seur_comb, reduction = "umap", label = F, cols = mycoloursP[14:50], group.by = "sample_count_per_cl_group") ``` ```{r} DimPlot(seur_comb, reduction = "umap", label = F, cols = mycoloursP[20:30], group.by = "sample_count_per_cl_group", split.by = "sample_count_per_cl_group") ``` Relevel 10 X batch for plotting in order ```{r} seur_comb$X10XBatch <- as.factor(seur_comb$X10XBatch) seur_comb$X10XBatch <- factor(seur_comb$X10XBatch, levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")) Idents(seur_comb)<-"X10XBatch" mycolours<- mycoloursP[1:7] DimPlot(seur_comb, reduction = "umap", cols = mycoloursP) ``` ```{r, fig.width=7, fig.height=5, fig.fullwidth=TRUE} Idents(seur_comb)<-"caseNO" DimPlot(seur_comb, reduction = "umap", cols = mycoloursP) ``` ```{r} Idents(seur_comb)<-"SequencingPool" mycolours<- mycoloursP[1:6] DimPlot(seur_comb, reduction = "umap", cols = mycoloursP) ``` ```{r} Idents(seur_comb)<-"gender" mycolours<- mycoloursP[1:2] DimPlot(seur_comb, reduction = "umap", cols = mycoloursP) ``` ```{r} seur_comb@meta.data$AgeGroup<-ifelse(seur_comb@meta.data$Age < 50, "Young", "Old") Idents(seur_comb)<-"AgeGroup" DimPlot(seur_comb, reduction = "umap", cols = mycoloursP[10:11]) ``` ```{r} seur_comb@meta.data$RIN<-ifelse(seur_comb@meta.data$RINvalue == "", "NULL", seur_comb@meta.data$RINvalue) Idents(seur_comb)<-"RIN" DimPlot(seur_comb, reduction = "umap", cols = mycoloursP[6:40]) ``` ```{r} Idents(seur_comb)<-"UMI_QC" mycolours<- mycoloursP[25:26] DimPlot(seur_comb, reduction = "umap", cols = mycolours) ``` ```{r} Idents(seur_comb)<-"CauseOfDeath_category" DimPlot(seur_comb, reduction = "umap", cols = mycoloursP[20:27]) ``` Doublet markers ```{r} DimPlot(seur_comb, reduction = "umap", label = F, group.by = "scDblFinder_class", cols = c(mycoloursP[25] , mycoloursP[26]), pt.size = 0.7) ``` ```{r} DimPlot(seur_comb, reduction = "umap", label = F, group.by = "scDblFinder_class", split.by = "scDblFinder_class", cols = c(mycoloursP[25] , mycoloursP[26]), pt.size = 0.7) ``` ```{r} seur_comb@meta.data$ageSex <- ifelse(seur_comb@meta.data$gender == "M" & seur_comb@meta.data$AgeGroup == "Old", "Old men", ifelse(seur_comb@meta.data$gender == "M" & seur_comb@meta.data$AgeGroup == "Young", "Young men", ifelse(seur_comb@meta.data$gender == "F" & seur_comb@meta.data$AgeGroup == "Young", "Young women", "Old women"))) Idents(seur_comb)<-"ageSex" DimPlot(seur_comb, reduction = "umap", label = F, cols = mycoloursP[15:42]) ``` ```{r} seur_comb <- FindClusters(seur_comb, resolution = 0.1) DimPlot(seur_comb, reduction = "umap", label = T, cols = mycoloursP) ``` The following markers were used for cell types: neurons: SNAP25, STMN2,RBFOX3 and GABRB2; inhibitory neurons: GAD1, GAD2 and SLC32A1; excitatory neurons: SATB2, SLC17A7 and SLC17A6; astrocytes: GJA1, AQP4, GLUL, SOX9, NDRG2, GFAP, ALDH1A1, ALDH1L1 and VIM; endothelial: CLDN5, VTN and ICAM2; pericytes: PDGFRB and NOTCH3 macrophage/microglia: SPI1, MRC1, TMEM119, CX3CR1 and A1F1; OPCs: PDGFRA, CSPG4, GPR17, PTPRZ1, OLIG1, OLIG2, PCDH15, PTGDS, oligodendrocytes: PLP1, CNP, MAG, MOG, MOBP, MBP, SOX10 ```{r} FeaturePlot(seur_comb, features = "SNAP25") FeaturePlot(seur_comb, features = "GABRB2") FeaturePlot(seur_comb, features = "PLP1") FeaturePlot(seur_comb, features = "CNP") FeaturePlot(seur_comb, features = "GJA1") FeaturePlot(seur_comb, features = "AQP4") FeaturePlot(seur_comb, features = "CD74") FeaturePlot(seur_comb, features = "CX3CR1") FeaturePlot(seur_comb, features = "CLDN5") FeaturePlot(seur_comb, features = "ICAM2") FeaturePlot(seur_comb, features = "PDGFRB") FeaturePlot(seur_comb, features = "NOTCH3") FeaturePlot(seur_comb, features = "PDGFRA") FeaturePlot(seur_comb, features = "PCDH15") FeaturePlot(seur_comb, features = "GAD1") FeaturePlot(seur_comb, features = "GAD2") FeaturePlot(seur_comb, features = "SLC32A1") FeaturePlot(seur_comb, features = "SATB2") FeaturePlot(seur_comb, features = "SLC17A7") FeaturePlot(seur_comb, features = "SLC17A6") ``` Above are only a few marker genes. More can be found in a separate markdown file with a respective descriptive name. ```{r} seur_comb$rough_annot <- seur_comb$RNA_snn_res.0.1 Idents(seur_comb) <- "rough_annot" seur_comb <- RenameIdents( seur_comb, "0" = "Oligo", "1" = "RELN+ neurons", "2" = "Endothelial-Pericyte", "3" = "Microglia_Macrophages", "4" = "Neuron_In", "5" = "OPC", "6" = "Neuron_Ex", "7" = "Astrocyte", "8" = "Endothelial-Pericyte", "9" = "Unidendtified", "10" = "Neuron", "11" = "Unidendtified", "12" = "Neuron", "13" = "Unidendtified", "14" = "Neuron" ) ``` ```{r} seur_comb$rough_annot <- Idents(seur_comb) ``` ```{r, fig.width=7, fig.height=6, fig.fullwidth=TRUE} DimPlot(seur_comb, label = T, cols = mycoloursP, repel = T, group.by = "rough_annot") ``` ```{r, fig.width=7, fig.height=6, fig.fullwidth=TRUE} DimPlot(seur_comb, label = F, cols = mycoloursP, group.by = "rough_annot") ``` During our analysis we identified several samples with lower quality based on either their high total nuclei count, low mean UMI count or large proportion of spliced compared to unspliced mRNAs. We will mark those samples in the metadata ```{r} sample_qc_failed <- c(27, 8, 43, 63, 5, 2, 69, 45, 66, 50) seur_comb$sample_qc_failed <- ifelse(seur_comb$process_number %in% sample_qc_failed, "TRUE", "FALSE") ``` ```{r} DimPlot(seur_comb, group.by = "sample_qc_failed", cols = mycoloursP) DimPlot(seur_comb, group.by = "sample_qc_failed", split.by = "sample_qc_failed", cols = c(mycoloursP[6], mycoloursP[5] )) ``` ```{r} DimPlot(seur_comb, group.by = "QC_UMI", split.by = "QC_UMI", cols = mycoloursP[16:17]) ``` ```{r} summary(as.factor(seur_comb$sample_qc_failed)) ``` save annotatedData ```{r} dir.create("/Users/lseeker/Documents/Work/HumanCellAtlas/splice_control_out/datasets/05_Annotated/allCelltypes") saveRDS(seur_comb, "/Users/lseeker/Documents/Work/HumanCellAtlas/splice_control_out/datasets/05_Annotated/allCelltypes/HCA_rough_annotated_all.RDS") ``` Save a master cell metadata that includes all the cellcycle scores and doublet scores etc. and also a file including the barcodes of included oligodendrocytes. ```{r} met_data <- seur_comb@meta.data barcode_df <- data.frame(barcode = oligos$Barcode) write.csv(barcode_df, "/Users/lseeker/Documents/Work/HumanCellAtlas/git_repos/AW_scRNAseq/data/HCA_oligodendroglia_barcodes.csv" ) write.csv(met_data, "/Users/lseeker/Documents/Work/HumanCellAtlas/git_repos/AW_scRNAseq/data/HCA_master_nuclei_metadata.csv" ) ``` ```{r} sessionInfo() ```