Chimeras / Scripts / Human_Mouse_Separation / arrayRunSTARSolo-onlyCB.sh
arrayRunSTARSolo-onlyCB.sh
Raw
#!/bin/sh
# Not required if it is an array job
## Grid Engine options (lines prefixed with #$)
#$ -N RunStarSolo.sh # job name
#$ -cwd
#$ -l h_rt=10:00:00
#$ -l h_vmem=4G
#$ -pe sharedmem 16 
##$ -M YourEmailAdress   
##$ -m beas
#$ -t 1-4

# Initialise the environment modules
. /etc/profile.d/modules.sh

module load igmm/apps/STAR/2.7.8a

# Variables
#ID file
IDFILE=/exports/eddie/scratch/$USER/Chimeras/Data/samples_STARsolo.txt
# Assigning SAMPLE variable from the built-in array counter
SAMPLE=`sed -n ${SGE_TASK_ID}p "$IDFILE"`

# Path to the Reference Genome directory
GENOME="/exports/eddie/scratch/$USER/Chimeras/STAR_Index_Combined"
# Path to the directory where the samples are
FASTQR1=$(cat "/exports/eddie/scratch/$USER/Chimeras/Data/fastq_lists/${SAMPLE}_R1.csv")
FASTQR2=$(cat "/exports/eddie/scratch/$USER/Chimeras/Data/fastq_lists/${SAMPLE}_R2.csv")
# white list barcodes
WHITELIST="/exports/eddie/scratch/$USER/Chimeras/whitelist/3M-february-2018.txt"

# create a directory where to store the results
mkdir -p "/exports/eddie/scratch/$USER/Chimeras/Data/STARsolo-onlyCB/${SAMPLE}"
cd "/exports/eddie/scratch/$USER/Chimeras/Data/STARsolo-onlyCB/${SAMPLE}" 

# echo Sample
echo "Sample is $SAMPLE"
echo "FASTQR1 path is $FASTQR1"

# Run STARsolo
STAR \
 --runThreadN 16 \
 --genomeDir "$GENOME" \
 --readFilesIn "$FASTQR2" "$FASTQR1" \
 --soloType CB_UMI_Simple \
 --soloCBwhitelist "$WHITELIST" \
 --soloCellFilter  EmptyDrops_CR \
 --clipAdapterType CellRanger4 --outFilterScoreMin 30 \
 --soloCBmatchWLtype 1MM_multi_Nbase_pseudocounts --soloUMIfiltering MultiGeneUMI_CR --soloUMIdedup 1MM_CR \
 --readFilesCommand gunzip -c \
 --soloUMIlen 12 \
 --limitIObufferSize 250000000 \
 --limitOutSJcollapsed 9000000 \
 --outSAMattributes CB \
 --outSAMtype BAM SortedByCoordinate
 

echo "I did it for sample $SAMPLE"