library(Seurat) library(here) library(stringr) # to extract pattern library(dplyr) # manipulate df library(ggplot2) # plotting # Loop through all samples samples <- dir(here("Data", "STARsolo")) # Test different percentages pcts <- c(70, 80, 90) # empty df to hold summaries df_all <- data.frame() for(pct in pcts){ for(sample in samples){ print(paste("pct:",pct,";Sample:", sample)) # path to matrix path_matrix <- here("Data", "STARsolo", sample, "Solo.out", "Gene", "filtered") # read10x expects the genes to be saved in genes.tsv instead of feature.tsv file.rename(here(path_matrix, "features.tsv"), here(path_matrix,"genes.tsv")) # load reads matrix <- Read10X(path_matrix) # inspired from https://bustools.github.io/BUS_notebooks_R/10xv2.html # Divide the umi by the species they mapped to gene_species <- ifelse(str_detect(rownames(matrix), "^mm10_"), "mouse", "human") mouse_inds <- gene_species == "mouse" human_inds <- gene_species == "human" # mark cells as mouse or human cell_species <- tibble( barcode = colnames(matrix), n_mouse_umi = Matrix::colSums(matrix[mouse_inds, ]), n_human_umi = Matrix::colSums(matrix[human_inds, ]), tot_umi = Matrix::colSums(matrix), prop_mouse = n_mouse_umi / tot_umi, prop_human = n_human_umi / tot_umi ) # Classify species based on proportion of UMI, with cutoff of 80% cell_species <- cell_species %>% mutate(species = case_when( prop_mouse > pct/100 ~ "mouse", prop_human > pct/100 ~ "human", TRUE ~ "mixed" )) # create plot and summary p <- ggplot(cell_species, aes(n_human_umi, n_mouse_umi, color = species)) + geom_point(size = 1) + ggtitle(sample) summary <- table(cell_species$species) df_summary <- data.frame(mouse= summary["mouse"], human = summary["human"], mixed = summary["mixed"], row.names = sample) # filter the human and mouse barcodes cell_human <- cell_species %>% filter(cell_species$species == "human") cell_mouse <- cell_species %>% filter(cell_species$species == "mouse") # save # create directories plot_path <- here("outs", paste0("filter_",pct),"Human_Mouse_Separation", "plots") barcodes_path <- here("outs", paste0("filter_",pct), "Human_Mouse_Separation","barcodes") robj_path <- here("Data", "Human_Mouse_Separation", paste0("filter_",pct)) dir.create(here("outs", paste0("filter_",pct))) dir.create(plot_path) dir.create(barcodes_path) dir.create(robj_path) ggsave(filename = here(plot_path, paste0(sample,"_species.pdf")), plot = p) write.table(cell_human$barcode, here(barcodes_path, paste0(sample, "_human_barcodes.txt")), quote = FALSE, row.names = FALSE, col.names = FALSE ) write.table(cell_mouse$barcode, here(barcodes_path, paste0(sample, "_mouse_barcodes.txt")), quote = FALSE, row.names = FALSE, col.names = FALSE ) saveRDS(cell_species, here(robj_path, paste0(sample, "cell_species.rds"))) } # save summaries df_all <- rbind(df_all, df_summary) write.table(df_all, here("outs", paste0("summary_", pct, ".txt")), quote = FALSE) }