Nourhan Abdelfattah 2024-11-8
library(easypackages)
MyPackages<-c("dplyr","ggplot2","ggpubr","gridExtra","viridis","egg","presto","ComplexHeatmap",
"Seurat","harmony","cowplot","patchwork","stringr", "ggmin","SingleCellExperiment","CellChat")
libraries(MyPackages)
ObjName= "E0771 "
Subset="All_Clusters"
resolution= 1
#We first generate the SeuratObjects for each sample and we run Doublet finder
Files= c("Ctrl_1_Count","KO_1_Count","KO_3_Count" )
for(i in 1:length(Files)){
print(Files[i])
MYSC=Read10X(data.dir = paste0(RobjDirectory,"Archive/",Files[i],"/filtered_feature_bc_matrix/"))
SeuratObjMYSC <- CreateSeuratObject(counts = MYSC, min.features = 100,project = Files[i])
SeuratObjMYSC <- NormalizeData(SeuratObjMYSC, normalization.method = "LogNormalize",
scale.factor =10000)%>%
FindVariableFeatures(selection.method = "vst", nfeatures =2000)%>%
ScaleData()%>%
RunPCA(features = VariableFeatures(object = SeuratObjMYSC))
library(DoubletFinder)
## pK Identification (no ground-truth)
sweep.res.list <- paramSweep_v3(SeuratObjMYSC, PCs = 1:10, sct = FALSE)
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
bcmvn <- find.pK(sweep.stats)
df=as.data.frame(bcmvn)
## Homotypic Doublet Proportion Estimate
homotypic.prop <- modelHomotypic(SeuratObjMYSC@meta.data$Clusters)
SeuratObjMYSC@meta.data$ClusteringResults
nExp_poi <- round(0.075*nrow(SeuratObjMYSC@meta.data))
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
## Run DoubletFinder with varying classification stringencies
SeuratObjMYSC <- doubletFinder_v3(SeuratObjMYSC, PCs = 1:10, pN = 0.25, pK =
as.numeric(df$pK[df$BCmetric==max(df$BCmetric)]),
nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
SeuratObjMYSC <- doubletFinder_v3(SeuratObjMYSC, PCs = 1:10, pN = 0.25, pK =
as.numeric(df$pK[df$BCmetric==max(df$BCmetric)]),
nExp = nExp_poi.adj, reuse.pANN = colnames(SeuratObjMYSC@meta.data)[5], sct = FALSE)
colnames(SeuratObjMYSC@meta.data)[4]="pANN"
colnames(SeuratObjMYSC@meta.data)[5]="DoubletStatus"
assign(Files[i],SeuratObjMYSC)
}
Merging and Annotating the Samples
c("Ctrl_1_Count","KO_1_Count","KO_3_Count" )
SeuratObj <- merge(x =Ctrl_1_Count, y = c(KO_1_Count,KO_3_Count), add.cell.ids = c("Ctrl_1_Count","KO_1_Count","KO_3_Count" ))
SeuratObj$Sample <- NA
SeuratObj$Sample[which(str_detect(SeuratObj$orig.ident, "Ctrl_1_Count"))] <- "Ctrl"
SeuratObj$Sample[which(str_detect(SeuratObj$orig.ident, "KO_1_Count"))] <- "KO_1"
SeuratObj$Sample[which(str_detect(SeuratObj$orig.ident, "KO_3_Count"))] <- "KO_3"
SeuratObj$CellLine="E0771"
SeuratObj$Mousegenptype="B6"
SeuratObj$SampleType <- NA
SeuratObj$SampleType[which(str_detect(SeuratObj$orig.ident, "Ctrl_1_Count"))] <- "Control"
SeuratObj$SampleType[which(str_detect(SeuratObj$orig.ident, "KO_1_Count"))] <- "Foxm1_KO_1"
SeuratObj$SampleType[which(str_detect(SeuratObj$orig.ident, "KO_3_Count"))] <- "Foxm1_KO_3"
Filtering low quality cells
#Calculate mitoRatio and RiboRatio
SeuratObj$log10GenesPerUMI <- log10(SeuratObj$nFeature_RNA) / log10(SeuratObj$nCount_RNA)
SeuratObj$percent.mt <- PercentageFeatureSet(SeuratObj, pattern = "^mt-")
SeuratObj$mitoRatio <- SeuratObj@percent.mt / 100
SeuratObj$percent.Rp <- PercentageFeatureSet(SeuratObj, pattern = "^Rp[ls]")
SeuratObj$riboRatio <- SeuratObj@percent.Rp / 100
SeuratObj=subset(SeuratObj,subset = DoubletStatus == "Singlet"&
nFeature_RNA > 200&
percent.mt <15))
Dimension reduction and clustering
SeuratObj <- NormalizeData(SeuratObj, normalization.method = "LogNormalize", scale.factor = 10000)%>%
FindVariableFeatures( selection.method = "vst", nfeatures = 2000)
s.genes=read.csv("../mouse sgenes.csv")
g2m.genes=read.csv("../mouse g2mgenes.csv")
SeuratObj <- CellCycleScoring(SeuratObj, s.features = s.genes$x, g2m.features = g2m.genes$x, set.ident = TRUE)%>%
ScaleData(vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(SeuratObj))%>%
RunPCA(features = VariableFeatures(object = SeuratObj))
SeuratObj <- RunHarmony(SeuratObj, "Sample")
#ElbowPlot(SeuratObj,ndims = 50,reduction = "harmony")
dimz=1:40
SeuratObj <- FindNeighbors(SeuratObj,reduction = "harmony", dims = dimz)
SeuratObj <- RunUMAP(SeuratObj, reduction = "harmony",dims= dimz)
SeuratObj <- FindClusters(SeuratObj, resolution = resolution)
Renaming Clusters
SeuratObj$Cluster=NA
#Rename Clusters
clusters=levels(as.factor(SeuratObj@meta.data$seurat_clusters))
for(j in 1:length(clusters)){
if(j<10){
SeuratObj$Cluster[SeuratObj$seurat_clusters==j-1]=paste0("C0",j)
}else{
SeuratObj$Cluster[SeuratObj$seurat_clusters==j-1]=paste0("C",j)
}
}
Identifying differentially expressed markers for cell annotation
library(presto)
markers <- wilcoxauc(SeuratObj , 'Cluster',assay = "scale.data")
write.csv(markers,paste0(OutputDirectory,ObjName,Subset," presto Cluster markers ","res",resolution,".csv"))
marker.list=list(Anti_Apoptosis = c("Bcl11b", "Bcl2l1", "Bag3", "Bag4", "Birc3",
"Gadd45b"), `B1 B` = c("Ms4a1", "Sspn", "Itgb1", "Epha4", "Col4a4",
"Prdm1", "Irf4", "Cd38", "Xbp1", "Pax5", "Bcl11a", "Blk"), Basophil = c("Itga2",
"Cd9", "Clec12a"), CAFs = c("Dcn", "Col1a1", "Lum", "Col1a2",
"Sfrp2", "Col3a1", "Apod", "Postn", "Cthrc1", "Mmp11", "Col6a3",
"Ccn2", "Ctsk", "C1s1", "Col6a2", "Fn1", "Ccdc80", "Sparc", "Rarres2",
"Cxcl14", "Col6a1", "Sfrp4", "Aebp1", "Bgn", "Ptgds", "Serpinf1",
"Mmp2", "Cfd", "Timp1", "Fbln1", "Vcan", "C1ra", "C3", "Mfap4",
"Cxcl12", "Ccn1", "Col5a2", "Mxra8", "Islr", "Igfbp4", "Gsn",
"Serping1", "Igf2", "Igf1", "Pcolce", "Meg3", "Spon2", "Thy1",
"Cald1", "Igfbp6", "Mfap2", "Aspn", "Lgals1", "Dpt", "Timp3",
"Thbs2", "Fstl1", "Nnmt", "Lrp1", "Col5a1", "Htra1", "Col14a1",
"Col12a1", "Timp2", "Fbln2", "Srpx", "Pdgfrl", "Olfml3", "Efemp1",
"Mmp14", "Laptm4a", "Gem", "Ppic", "Igfbp7", "Egr1", "Fbn1",
"Cd63", "Prrx1", "Pmp22", "Htra3", "Tmem176b", "Tnfaip6", "Mdk",
"Cdh11", "Comp", "Clec11a", "Col10a1", "Fgf7", "Palld", "Efemp2",
"Mfap5", "Fbln5", "Cyp1b1", "Serpinh1", "Lgals3bp", "Col8a1",
"Plpp3", "Cfh", "Cilp", "Fap", "Rcn3", "Podn", "Nbl1", "Prss23",
"Eln", "Sulf1", "Tmem176a", "Id3", "Lhfp", "Col11a1", "Cst3",
"Crispld2", "F2r", "Ifitm3", "Gpnmb", "Adam12", "Tcf4", "Sod3",
"Ggt5"), `CD14+ Mono` = c("Fcna", "Cd14", "S100a8", "S100a9"),
`CD16+ Mono` = c("Tcf7l2", "Fcgr3", "Lyn", "Lst1", "Hk3"),
`CD4+ T activated` = c("Cd4", "Il7r", "Trbc2", "Itgb1"),
`CD4+ T Naïve` = c("Cd4", "Il7r", "Trbc2", "Ccr7"), `CD8+ T` = c("Cd8a",
"Cd8b1", "Gzmk", "Gzma", "Ccl5", "Gzmb", "Gzmc", "Gzma"),
cDC1 = c("Clec9a", "Cadm1", "Cpne3", "Dnase1l3", "Wdfy4"),
cDC2 = c("Ccr7", "Fscn1", "Lamp3", "Marcksl1"), cDC3 = c("Clec10a",
"Fcer1a", "Ppa1"), `E0771 markers` = c("Pcm1", "Map1b", "Hspg2",
"Serpine1", "Scd2", "Nrp2", "Nedd4", "Tead1", "Eprs", "Hspa9",
"Myc", "Ubr4", "Prrc2a", "Flnb", "Tcf20"), Endothelial = c("Ackr1",
"Fabp4", "Plvap", "Ramp2", "Vwf", "Aqp1", "Cldn5", "Sparcl1",
"Gng11", "Pecam1", "Eng", "Egfl7", "Hspg2", "Ramp3", "A2m",
"Spry1", "Clec14a", "Rbp7", "Emcn", "Igfbp7", "Adgrl4", "Id1",
"Npdc1", "Esam", "Calcrl", "Cav1", "Igfbp3", "Ifi27", "Rnase1",
"Cd34", "Cavin2", "Ifitm3", "Igfbp4", "Fkbp1a", "Tm4sf1",
"Plpp1", "Hyal2", "Slc9a3r2", "Crip2", "Pdlim1", "Col15a1",
"Col4a1", "Epas1", "Prcp", "Palmd", "Emp1", "Cd36", "Cd93",
"Tcf4", "Socs3", "Cavin1", "Col4a2", "Flt1", "Pdk4", "Cyyr1",
"Id3", "Bcam", "Mctp1", "Lmcd1", "Adamts1", "Sele", "Adamts9",
"Tgfbr2", "Cd59a", "Rcan1", "Edn1", "Tspan7", "Stc1", "Il33",
"Icam2", "Ets2", "Arhgap29", "Gsn", "Cdh5", "Podxl", "Ptprb",
"Mgll", "Sptbn1", "Mmrn2", "Stom", "Adam15", "Jam2", "Sox17",
"Mgst2", "S1pr1", "Thbd", "Vwa1", "Nnmt", "Vim", "Sparc",
"Clu", "App", "Dusp23", "Itga6", "Heg1", "Adcy4", "Nrn1",
"Ldb2", "Fabp5", "Mall", "Nrp1"), Erythroblast = c("Mki67",
"Hba-a1", "Hba-a2"), `Fatty acid metabolism` = c("Acaa1a",
"Acadm", "Acadvl", "Acsl5", "Aldh16a1", "Aldh9a1", "Ech1",
"Echs1", "Hadha", "Hsd17b10", "Slc27a2", "Decr1", "Pex16",
"Scp2"), Fibroblasts = c("Fbln1", "Dcn", "Lum", "C1s1", "C1ra",
"Slit2", "Col1a1", "Col1a2", "Col3a1", "Col6a3", "Aspn",
"Ltbp1", "Itgbl1", "Cald1", "Postn", "Bgn", "Sfrp2"), `G/M prog` = c("Mpo",
"Bcl2", "Kcnq5", "Csf3r"), Glycolysis = c("Slc2a3", "Slc2a8",
"Pfkfb3", "Aldoa", "Eno1", "Gapdh", "Gpi1", "Pgam1", "Pgk1",
"Pkm", "Tpi1", "Gyg", "Mdh1", "Mdh2", "Ldha", "Ldhb", "Adh5",
"Idh2"), Hypoxia = c("Ackr3", "Adm", "Adora2b", "Ak4", "Akap12",
"Aldoa", "Aldob", "Aldoc", "Ampd3", "Angptl4", "Ankzf1",
"Anxa2", "Atf3", "Atp7a", "B3galt6", "B4galnt2", "Bcan",
"Bcl2", "Bgn", "Bhlhe40", "Bnip3l", "Brs3", "Btg1", "Car12",
"Casp6", "Cav1", "Cavin1", "Cavin3", "Ccn1", "Ccn2", "Ccn5",
"Ccng2", "Cdkn1a", "Cdkn1b", "Cdkn1c", "Cdkn1c", "Chst2",
"Chst3", "Cited2", "Col5a1", "Cp", "Csrp2", "Cxcr4", "Dcn",
"Ddit3", "Ddit4", "Dpysl4", "Dtna", "Dusp1", "Edn2", "Efna1",
"Efna3", "Egfr", "Eno1", "Eno2", "Eno3", "Errfi1", "Ets1",
"Ext1", "F3", "Fam162a", "Fbp1", "Fos", "Fosl2", "Foxo3",
"Gaa", "Galk1", "Gapdh", "Gapdhs", "Gbe1", "Gck", "Gcnt2",
"Gcnt2", "Glrx", "Gpc1", "Gpc3", "Gpc4", "Gpi1", "Grhpr",
"Gys1", "Has1", "Hdlbp", "Hexa", "Hk1", "Hk2", "Hmox1", "Hoxb9",
"Hs3st1", "Hspa5", "Ids", "Ier3", "Ier3", "Ier3", "Ier3",
"Ier3", "Ier3", "Ier3", "Igfbp1", "Igfbp3", "Il6", "Ilvbl",
"Inha", "Irs2", "Isg20", "Jmjd6", "Jun", "Kdelr3", "Kdm3a",
"Kif5a", "Klf6", "Klf7", "Klhl24", "Lalba", "Large1", "Ldha",
"Ldha", "Ldhc", "Lox", "Lxn", "Maff", "Map3k1", "Mif", "Mif",
"Mxi1", "Myh9", "Nagk", "Ncan", "Ndrg1", "Ndst1", "Ndst2",
"Nedd4l", "Nfil3", "Noct", "Nr3c1", "P4ha1", "P4ha2", "Pam",
"Pck1", "Pdgfb", "Pdk1", "Pdk3", "Pfkfb3", "Pfkl", "Pfkp",
"Pgam2", "Pgf", "Pgk1", "Pgm1", "Pgm2", "Phkg1", "Pim1",
"Pklr", "Pklr", "Pkp1", "Plac8", "Plaur", "Plin2", "Pnrc1",
"Ppargc1a", "Ppfia4", "Ppp1r15a", "Ppp1r3c", "Prdx5", "Prkca",
"Pygm", "Rbpj", "Rora", "Rragd", "S100a4", "Sap30", "Scarb1",
"Sdc2", "Sdc3", "Sdc4", "Selenbp1", "Serpine1", "Siah2",
"Slc25a1", "Slc2a1", "Slc2a3", "Slc2a5", "Slc37a4", "Slc37a4",
"Slc37a4", "Slc6a6", "Srpx", "Stbd1", "Stc1", "Stc2", "Sult2b1",
"Tes", "Tgfb3", "Tgfbi", "Tgm2", "Tiparp", "Tktl1", "Tmem45a",
"Tnfaip3", "Tpbg", "Tpbg", "Tpd52", "Tpi1", "Tpst2", "Ugp2",
"Vegfa", "Vhl", "Vldlr", "Wsb1", "Xpnpep1", "Zfp36"), `ID2-hi myeloid prog` = c("Cd14",
"Id2", "Vcan", "S100a9", "Clec12a", "Klf4", "Plaur"), `Lymph prog` = c("Vpreb1",
"Mme", "Ebf1", "Ssbp2", "Bach2", "Cd79b", "Ighm", "Pax5",
"Prkce", "Dntt", "Igll1"), M2_Macs = c("Ms4a6d", "Cd163",
"Stab1", "Slc1a3", "Colec12", "F13a1", "Mrc1", "Tlr2", "Cd163l1",
"Slco2b1"), Macs = c("Spic", "Fabp3", "Cd5l", "Ccl3", "C1qc",
"C1qb", "Fabp4", "C1qa", "Apoe", "Selenop"), Mast = c("Kit",
"Tpsab1", "Cpa3", "Clu"), `MK/E prog` = c("Itga2b", "Ryr3"
), Naïve = c("Il7r", "Ccr7", "Sell", "Foxo1", "Klf2", "Klf3",
"Lef1", "Tcf7", "Actn1", "Foxp1"), `Naïve CD20+ B` = c("Ms4a1",
"Il4ra", "Ighd", "Fcrl1", "Ighm"), Neutrophils = c("Cxcl10",
"Ifit1", "Rsad2", "Stat1"), NK = c("Nkg7", "Cd247", "Grik4",
"Fcer1g", "Tyrobp", "Klrg1", "Fcgr3"), `Normal Epithelial` = c("Krt14",
"Krt17", "Ltf", "Krt15", "Ptn", "Mgp", "Tacstd2", "Mmp7",
"Scgb3a1", "Krt5", "Cryab", "Cxcl2", "Saa1", "Cldn4", "Wfdc2",
"Mylk", "Tm4sf1", "Sfrp1", "Azgp1", "Dst", "Krt8", "Csrp1",
"Actg2", "Krt7", "Tagln", "Clu", "Pdlim3", "Sfn", "Maff",
"Slpi", "Ccl28", "Fbxo32", "Perp", "Krt18", "Cnn1", "Cxcl14",
"Ccn1", "Rcan1", "Acta2", "Pip"), Normoblast = c("Slc4a1",
"Slc25a37", "Hba-a2", "Hba-a1", "Tfrc"), `Oxidative phosphorylation` = c("Atp6v1h",
"Atp2b4", "Atp6v1g3", "Atp1b1", "Atp1a4", "Atp1a2", "Atp5c1",
"Atp5g3", "Atp8b4", "Atp9a", "Atp5e", "Atp11b", "Atp8b2",
"Atp1a1", "Atp5f1", "Atp6v0d2", "Atp8b5", "Atp6v1g1", "Atpaf1",
"Atp6v0b", "Atpif1", "Atp13a2", "Atp8a1", "Atp5k", "Atp2a2",
"Atp6v0a2", "Atp5j2", "Atp6v1f", "Atp6v1fnb", "Atp6v0a4",
"Atp6v0e2", "Atp6v1b1", "Atp2b2", "Atp6v1e1", "Atp1a3", "Atp4a",
"Atp10a", "Atp2a1", "Atp11a", "Atp4b", "Atp7b", "Atp6v1b2",
"Atp13a1", "Atp6v0d1", "Atp2c2", "Atp5l", "Atp1b3", "Atp2c1",
"Atp5d", "Atp8b3", "Atp2b1", "Atp23", "Atp5b", "Atp10b",
"Atpaf2", "Atp1b2", "Atp2a3", "Atp5g1", "Atp6v0a1", "Atp5h",
"Atp6v1c2", "Atp6v1d", "Atp5mpl", "Atp6ap1l", "Atp12a", "Atp8a2",
"Atpsckmt", "Atp6v1c1", "Atp5g2", "Atp13a5", "Atp13a4", "Atp13a3",
"Atp6v1a", "Atp5j", "Atp5o", "Atp6v0c", "Atp6v0e", "Atp6v1g2",
"Atp6v1e2", "Atp8b1", "Atp5a1", "Atp9b", "Atp5md", "Atp6ap2",
"Atp1b4", "Atp11c", "Atp2b3", "Atp6ap1", "Atp7a", "Cox16",
"Cox17", "Cox4i1", "Cox5a", "Cox5b", "Cox6a1", "Cox6b1",
"Cox6c", "Cox7a2", "Cox7b", "Cox7c", "Cox8a", "Cyc1", "mt-Nd1",
"mt-Nd2", "mt-Co1", "mt-Co2", "mt-Atp8", "mt-Atp6", "mt-Co3",
"mt-Nd3", "mt-Nd4l", "mt-Nd4", "mt-Nd5", "mt-Nd6", "mt-Cytb",
"Ndufa1", "Ndufa10", "Ndufa11", "Ndufa12", "Ndufa13", "Ndufa2",
"Ndufa3", "Ndufa4", "Ndufa5", "Ndufa6", "Ndufab1", "Ndufaf3",
"Ndufaf4", "Ndufaf8", "Ndufb10", "Ndufb11", "Ndufb2", "Ndufb3",
"Ndufb4", "Ndufb6", "Ndufb7", "Ndufb8", "Ndufb9", "Ndufc1",
"Ndufc2", "Ndufs2", "Ndufs3", "Ndufs5", "Ndufs6", "Ndufs7",
"Ndufs8", "Ndufv1", "Ndufv2", "Sdha", "Sdhb", "Tcirg1", "Uqcrb",
"Uqcrc1", "Uqcrc2", "Uqcrfs1", "Uqcrh", "Uqcrq", "Surf1",
"Surf4"), pDCs = c("Siglech", "Ccr9", "Ly6d", "Jchain"),
`Plasma cells` = c("Mzb1", "Hsp90b1", "Fndc3b", "Prdm1",
"Igkc", "Jchain"), Plasmablast = c("Xbp1", "Prdm1"), Pro_Apoptosis = c("Bcl2l11",
"Bid", "Bin1", "Bin2", "Bax", "Bak1", "Casp8", "Casp3"),
PVL = c("Acta2", "Rgs5", "Tagln", "Myl9", "Ndufa4l2", "Tpm2",
"Igfbp7", "Cald1", "Sod3", "Ccl19", "Col18a1", "Sparcl1",
"Ccl2", "Myh11", "Lhfp", "Mfge8", "Ppp1r14a", "Rergl", "Igfbp5",
"Notch3", "Ptp4a3", "Cpe", "Mcam", "Tinagl1", "Cav1", "Tpm1",
"Bgn", "Frzb", "Gja4", "Pdgfrb", "Timp3", "Dstn", "Csrp2",
"Map1b", "Mylk", "Cavin3", "Col4a2", "Cox4i2", "Sorbs2",
"Sparc", "Higd1b", "Serping1", "Polr2k", "Adamts1", "Pln",
"Gpx3", "Timp1", "Id3", "Col4a1", "Thy1", "Ifitm3", "Filip1l",
"Pgf", "Cavin1", "A2m", "Crispld2", "Rgs16", "Map3k7cl",
"Cryab", "C1qtnf1", "Nr2f2", "Csrp1", "Tppp3", "Isyna1",
"Eps8", "Steap4", "Tgfb1i1", "Pcolce", "Lmod1", "Egr1", "Col6a2",
"Tsc22d1", "Jag1", "Rasd1", "Flna", "Ntrk2", "Smoc2", "Mef2c",
"Epas1", "Mgst3", "Wfdc1", "Ednra", "Oaz2", "Ptk2", "Angptl4",
"Serpinf1", "Adamts4", "Myc", "Fhl1", "Dkk3", "Bcam", "Sdc2",
"Cnn3", "Cdkn1a", "Synpo2", "Cav2"), `Response to Hypoxia` = c("Alas2",
"Ang", "Arnt2", "Bnip3", "Cd24a", "Chrna4", "Chrna7", "Chrnb2",
"Cldn3", "Crebbp", "Cxcr4", "Egln1", "Egln2", "Ep300", "Epas1",
"Hif1a", "Hsp90b1", "Mt3", "Ciao3", "Nf1", "Pdia2", "Plod1",
"Plod2", "Pml", "Smad3", "Smad4", "Tgfb2", "Vegfa"), `Stress Response` = c("Hikeshi",
"P4hb", "Uba52", "Mapk1", "Rpa2", "Rpa3", "Tnrc6a", "Tnrc6b",
"Tnrc6c", "Nup210", "Rbx1", "Ubb", "Ubc", "Crebbp", "Ets1",
"Lamtor1", "Eed", "Lamtor2", "Hsp90aa1", "Rbbp4", "Lamtor4",
"Lamtor5", "Psmb10", "Cdkn1a", "Anapc11", "Psma1", "Atg4d",
"Psma2", "Wipi2", "Psma3", "Tfdp2", "Psma4", "Atm", "Cycs",
"Psma5", "Wdr45", "Anapc16", "Psma6", "Ehmt1", "Psma7", "Ehmt2",
"Terf1", "Psmc1", "Atr", "Psmc3", "Sod1", "Psmc4", "Psmc5",
"Psmc6", "Psme1", "Atg3", "Ncf1", "Psme2", "Atg5", "Chmp2a",
"Chmp2b", "Gsk3b", "Rps19bp1", "Tpr", "Mdm4", "Elob", "Cdc26",
"Eloc", "St13", "Rps6ka1", "Eef1a1", "Rps6ka3", "Dnajb1",
"Camk2g", "Rps27a", "Txn2", "Prdx1", "Prdx2", "Dnajb6", "Fkbp4",
"Prdx3", "Map2k3", "Mtmr14", "Atg12", "Prdx5", "Rheb", "Prdx6",
"Gstp1", "Atg14", "Rela", "Gpx1", "Vhl", "Hspa1a", "Ube2d1",
"Terf2ip", "Hspa1b", "Ube2d2a", "Txn1", "Ube2d3", "Hsph1",
"Cyba", "Cdk4", "Hsf1", "Cdk6", "Mapkapk3", "Cited2", "Map1lc3b",
"Psmd12", "Hif1a", "Hmga1", "Psmd13", "Mink1", "Psmd14",
"Hsp90ab1", "Ptges3", "Hsbp1", "Nfkb1", "Uvrag", "Cat", "Psmb1",
"Cdkn2c", "Psmb2", "Cdkn2d", "Fos", "Psmb3", "Ccs", "Jun",
"Gabarapl1", "Gabarapl2", "Psmb6", "Kdm6b", "Psmb8", "Psmd2",
"Tnik", "Psmb9", "Psmd3", "Ranbp2", "Atox1", "Sem1", "Psmd6",
"Gabarap", "Psmd7", "Psmf1", "Chmp3", "Stat3", "Psmd8", "Tinf2",
"Psmd9", "Chmp7", "Ccar2", "Hspa4", "Ywhae", "Bag1", "Hspa5",
"Bag3", "Dynll1", "Hspa8", "Rb1cc1", "Bag5", "Anapc5", "Dynll2",
"Vcp", "Dnajc2", "Cabin1", "Cebpb"), `T activation` = c("Cd3e",
"Cd69", "Cd38"), `T Naïve` = c("Lef1", "Ccr7", "Tcf7"),
`Transitional B` = c("Mme", "Cd38", "Cd24a", "Acsm3", "Msi2"
))
SeuratObj <- AddModuleScore(SeuratObj,
features = marker.list,
ctrl = 5,
name = "FunctionScore")
for(i in 1:length(marker.list[-44])){
colnames(SeuratObj@meta.data)[colnames(SeuratObj@meta.data) == paste0("FunctionScore", i)] <- names(marker.list)[i]
}
Assign cell types based on marker expression
Assign= list(Tcells = c("C14", "C19", "C10", "C23", "C09", "C12"), NKcells = "C01",
Tumor = c("C16", "C04", "C07", "C06", "C17", "C13"), BCells = "C22",
Neutrophils = "C03", Monocytes = "C08", Macs = c("C15", "C05",
"C30"), DCs = c("C18", "C11", "C24", "C20"), Mastcells = "C25",
Vasc = "C21", Myeloid_Prog = "C29", CAFs = "C26", Blood = "C28",
Plasmablast = "C27", Epithelial = "C02")
SeuratObj$Assignment=NA
for(i in 1:length(Assign)){
SeuratObj$Assignment[SeuratObj$Cluster %in% Assign[[i]]]=names(Assign[i])
}
CNV prediction to confirm malignant vs normal cells
library(infercnv)
counts_matrix = as.matrix(SeuratObj@assays$RNA@counts[,colnames(SeuratObj)])
metadatasub=SeuratObj@meta.data[,"Assignment",drop=F]
genes=read.delim("../gene_positions.txt",row.names = 1,header = F)
colnames(genes)=c("C_CHR", "C_START", "C_STOP")
features=read.delim(paste0("../filtered_feature_bc_matrix/features.tsv.gz"),row.names = 1,header = F)
featuressub=features[features$V2 %in%row.names(SeuratObj),]
genessub=genes[match(row.names(featuressub),row.names(genes)),]
featuressub$V2=make.unique(featuressub$V2)
row.names(genessub)=featuressub$V2
genessub=genessub[row.names(SeuratObj),];dim(genessub)
dir="infCNV/"
dir.create(dir)
options(scipen = 100)
library(infercnv)
infercnv_obj = CreateInfercnvObject(raw_counts_matrix=counts_matrix,
annotations_file=metadatasub,
delim="\t",
gene_order_file=genessub,
ref_group_names=c("Tcells","NKcells","Monocytes","Macs"))
options(Seurat.object.assay.version = "v3")
infercnv_obj = infercnv::run(infercnv_obj,scale_data = T,
cutoff=0.1, # cutoff=1 works well for Smart-seq2, and cutoff=0.1 works well for 10x Genomics
out_dir=dir, num_threads = 1,
cluster_by_groups=TRUE,
denoise=TRUE,
HMM=TRUE)
SeuratObj=add_to_seurat(
seurat_obj = SeuratObj,
assay_name = "RNA",
dir,
top_n = 10,
bp_tolerance = 2e+06,
column_prefix = NULL
)
De Novo clustering of T cells
SeuratObjT=subset(SeuratObj,subset= Assignment %in% c("Tcells","NKcells"))
ObjName= "E0771 T&NKcells "
Subset="TCells"
resolution= 1
dimz=1:15
abb="TNC"
T cells dimension reduction and clustering
SeuratObjT <- NormalizeData(SeuratObjT, normalization.method = "LogNormalize", scale.factor = 10000)%>%
FindVariableFeatures( selection.method = "vst", nfeatures = 2000)%>%
ScaleData(features = rownames(SeuratObjT))%>%
RunPCA(features = VariableFeatures(object = SeuratObjT))
SeuratObjT <- RunHarmony(SeuratObjT, "Sample")
#ElbowPlot(SeuratObjT,ndims = 50,reduction = "harmony")
dimz=1:50
resolution=0.2
SeuratObjT <- FindNeighbors(SeuratObjT,reduction = "harmony", dims = dimz)%>%
RunUMAP( reduction = "harmony",dims= dimz)%>%
FindClusters(resolution = resolution)
#Rename Clusters
SeuratObjT$Cluster=NA
clusters=levels(as.factor(SeuratObjT@meta.data$seurat_clusters))
for(j in 1:length(clusters)){
if(j<10){
SeuratObjT$Cluster[SeuratObjT$seurat_clusters==j-1]=paste0("TNC0",j)
}else{
SeuratObjT$Cluster[SeuratObjT$seurat_clusters==j-1]=paste0("TNC",j)
}
}
markers = FindAllMarkers(SeuratObjT,logfc.threshold = 0.5,test.use = "wilcox",only.pos = T )
write.csv(markers,paste0(OutputDirectory,ObjName," - ",Subset," - Cluster Markers - ","res ",resolution,".csv"))
Assigning T cells subsets based on marker expression and ProjecTILs projection
library(ProjecTILs)
ref <- readRDS(paste0("../ref_TILAtlas_mouse_v1.rds"))
query.projected <- make.projection(SeuratObjT, ref=ref, filter.cells = T)
query.projected <- cellstate.predict(ref=ref, query=query.projected)
SeuratObjT$ProjecTILsAssignment = query.projected$functional.cluster
table(SeuratObjT$ProjecTILsAssignment,SeuratObjT$Cluster)
Tgenes=c("Cd3g","Cd8a","Cd4","Tbx21","Ifng","Gata3","Il4","Foxp3","Il2ra","Il10","Gzmb","Prf1","Klrk1",
"Cd69","Mki67","Il2","Il12a","Cd28","Tnfrsf9","Tnfrsf4","Pdcd1","Tigit","Lag3",
"Il7r","Cd44","Sell")
DotPlot(SeuratObjT,group.by = "Assignment",dot.scale = 5 ,features = Tgenes ,scale = T)+scale_colour_viridis_c(option = "plasma")+
ggmin::theme_min()+ RotatedAxis()+theme(axis.title = element_blank())+labs(title=paste("Clusters"))
Assign=list(`Naiive TCells-CD8+` = "TNC09", `Exhasted CD8+ TCells` = "TNC06",
`CD4+ TCells` = "TNC10", `Activated TCells` = "TNC04", `Effector CD4+ TCells` = "TNC07",
Treg = "TNC02", `Proliferating TCells` = "TNC08", doublet = "TNC11",
`Effector Memory` = "TNC14", `CD8+ TCells` = "TNC13", NKcells = c("TNC01",
"TNC03", "TNC05", "TNC12"))
SeuratObjT$Assignment=NA
for(i in 1:length(Assign)){
SeuratObjT$Assignment[SeuratObjT$Cluster %in% Assign[[i]]]=names(Assign[i])
}