Chimeras / Scripts / Compare_Method1_Method2 / import_and_filter_ddH20_A.R
import_and_filter_ddH20_A.R
Raw
# Create a seurat object fro ddH2O_A and check the no. of UMI & genes

library(Seurat)
library(here)
library(DropletUtils)


# variables
matrix <- here("outs", "filter_70", "CellRanger-combined", "human", "19880WApool01__ddH2O_A_S3", "outs", "raw_feature_bc_matrix")
counts <- Read10X(matrix)
barcodes_path <- here("outs", "filter_70","barcodes","19880WApool01__ddH2O_A_S3_human_barcodes.txt")

# import counts as a seurat object
ddH2O <- CreateSeuratObject(counts = counts, project = "ddH2O", min.cells = 3, min.features = 200)
ddH2O

head(ddH2O@meta.data)

## filter for only the human barcodes
# import barcodes into a vector
barcodes_df <- read.delim(barcodes_path, header = FALSE)
barcodes_vector <- barcodes_df$V1
# add the "-1" seurat adds to barcodes (careful if importing more than one sample)
barcodes <- paste0(barcodes_vector, "-1")

# subset object for only these cells
ddH2O <- subset(ddH2O, cells = barcodes)

dim(ddH2O)
#old: 15,769   717
#new  17,995   717
sum(ddH2O@meta.data$nCount_RNA)
#old:  4.519.846
#new:  3.985.942 ?
sum(GetAssayData(ddH2O,"counts"))
#new 3985942
sum(ddH2O@meta.data$nFeature_RNA)
# old: 1602103
# new: 1604362


# test if we get the same with sce
sce <- read10xCounts(matrix, version = "auto", col.names = TRUE)
sce <- sce[,barcodes]

dim(sce)
#36,601   717 # they filter genes differently probably, same cells

sum(assay(sce))
# 3.989.439  # same number of UMIs

sum(rowSums(counts(sce) > 0) > 1)
#17112