#! /bin/bash
#SIZE FILTER AND MERGE TO SINGLE FILES
cd /home/jankatalinic/hdd/RciVariants/Orthogonality_repl1_071222/barcode23
let OG_NUM=0
READS=$(ls *.fastq)
mkdir Mapped
for FILE in $READS
do
LINE_NUM=$(cat $FILE | wc -l)/4
OG_NUM=$(($OG_NUM + $LINE_NUM))
seqkit seq $FILE -m4000 -M5500 >> "TRIM_FILE".fastq
done
FWD_LINES=$(cat TRIM_FILE.fastq | wc -l)
FWD_COUNT=$(echo "scale=0 ; $FWD_LINES / 4" | bc)
#GENERATE METADATA
echo "The original files contained:" $OG_NUM "reads." >> "Metadata".txt
echo "The new filtered reads file contains:" "$FWD_COUNT" "reads." >> "Metadata".txt
REFS=$(ls *.fasta)
for FILE in $REFS
do
chmod +x $FILE
done
MAP="map"
#FOR-LOOP MAPPING READS TO EACH REFERENCE SEQ - OUTPUT AS SAM FILE
for FILE in $REFS
do
STRIP=$(basename $FILE .fasta)
minimap2 -ax map-ont --no-long-join -g100 --secondary=no -Y --sam-hit-only $FILE TRIM_FILE.fastq | samtools sort -o Mapped/$STRIP-$MAP.bam
done
#FILTER FOR BEST MATCHES AND WRITE TO NEW SAM FILE (FWD)
cd Mapped
MAPPED=$(ls *.bam)
FILT="_filtered"
mkdir Filtered
for FILE in $MAPPED
do
STRIP=$(basename $FILE .bam)
samtools view -h $FILE | grep -E 'AS:i:7[5-9][0-9][0-9]|AS:i:[8-9][0-9][0-9][0-9]' > Filtered/"$STRIP"$FILT.bam
done
#WRITE TEXT FILE WITH SEQUENCE ID AS FILENAME AND REF ID AS STRING (FWD)
cd Filtered
FILTERED=$(ls *_filtered.bam)
DUP="duplicated_"
sort $FILTERED | uniq -w37 -d >> "duplicates".bam
DUPID=$(cat duplicates.bam | cut -f1)
echo $DUPID
for DOC in $FILTERED
do
NUM_LINES=$(cat -n $DOC)
LINE_NO=$(cat $DOC | wc -l)
i=1
while [ "$i" -le "$LINE_NO" ]; do
echo "$i"
SEQID=$(awk -v var="$i" 'NR==var' $DOC | cut -f1)
REFID=$(awk -v var="$i" 'NR==var' $DOC | cut -f3)
i=$(($i + 1))
if [[ "$DUPID" == *"$SEQID"* ]]; then
echo "$REFID" > "$DUP""$SEQID".txt
else
echo "$REFID" > "$SEQID".txt
fi
done
done
echo "Alignment completed."