#! /bin/bash #REQUEST DIRECTORY INPUT FROM USER read -p "Enter directory name (exactly as appears in finder): " direc # read -p "Please enter variant ID: " vnt # read -p "Please enter the forward sfx nucleotide sequence: " SFX # SFX_REV=$(seqkit seq -r "$FSX" ) #SIZE FILTER AND MERGE TO SINGLE FILES cd /home/jankatalinic/Documents/RciVariants_analysis/Analysis/test_nov22/Rci7 cd $direc let OG_NUM=0 READS=$(ls *.fastq) mkdir Mapped cd cd /home/jankatalinic/Documents/RciVariants_analysis/Analysis/test_nov22/Rci7 cd $direc for FILE in $READS do LINE_NUM=$(cat $FILE | wc -l)/4 OG_NUM=$(($OG_NUM + $LINE_NUM)) seqkit seq $FILE -m4000 -M5500 >> "TRIM_FILE".fastq done FWD_LINES=$(cat TRIM_FILE.fastq | wc -l) FWD_COUNT=$(echo "scale=0 ; $FWD_LINES / 4" | bc) #GENERATE METADATA echo "The original files contained:" $OG_NUM "reads." >> "Metadata".txt echo "The new filtered reads file contains:" "$FWD_COUNT" "reads." >> "Metadata".txt cd cd /home/jankatalinic/Documents/RciVariants_analysis/Analysis/test_nov22/Rci7 cd $direc REFS=$(ls ~/"$direc"/*.fasta) for FILE in $REFS do chmod +x $FILE done MAP="map" #FOR-LOOP MAPPING READS TO EACH REFERENCE SEQ - OUTPUT AS SAM FILE for FILE in $REFS do STRIP=$(basename $FILE .fasta) minimap2 -ax map-ont --no-long-join -g100 --secondary=no -Y --sam-hit-only $FILE TRIM_FILE.fastq > ~/"$direc"/Mapped/$STRIP-$MAP.bam done #FILTER FOR BEST MATCHES AND WRITE TO NEW SAM FILE (FWD) cd Mapped MAPPED=$(ls *.bam) FILT="_filtered" mkdir Filtered for FILE in $MAPPED do STRIP=$(basename $FILE .bam) samtools view -h $FILE | grep -E 'AS:i:7[5-9][0-9][0-9]' > ~/"$direc"/Mapped/Filtered/"$STRIP"$FILT.bam done cd cd /home/jankatalinic/Documents/RciVariants_analysis/Analysis/test_nov22/Rci7 #WRITE TEXT FILE WITH SEQUENCE ID AS FILENAME AND REF ID AS STRING (FWD) cd $direc cd Mapped cd Filtered FILTERED=$(ls *_filtered.bam) DUP="duplicated_" sort $FILTERED | uniq -w37 -d >> "duplicates".bam DUPID=$(cat duplicates.bam | cut -f1) echo $DUPID for DOC in $FILTERED do NUM_LINES=$(cat -n $DOC) LINE_NO=$(cat $DOC | wc -l) i=1 while [ "$i" -le "$LINE_NO" ]; do echo "$i" SEQID=$(awk -v var="$i" 'NR==var' $DOC | cut -f1) REFID=$(awk -v var="$i" 'NR==var' $DOC | cut -f3) i=$(($i + 1)) if [[ "$DUPID" == *"$SEQID"* ]]; then echo "$REFID" > "$DUP""$SEQID".txt else echo "$REFID" > "$SEQID".txt fi done done cd cd /home/jankatalinic/Documents/RciVariants_analysis/Analysis/test_nov22/Rci7 echo "Alignment completed."