#! /bin/bash #SIZE FILTER AND MERGE TO SINGLE FILES cd /home/jankatalinic/hdd/RciVariants/Orthogonality_repl1_071222/barcode23 let OG_NUM=0 READS=$(ls *.fastq) mkdir Mapped for FILE in $READS do LINE_NUM=$(cat $FILE | wc -l)/4 OG_NUM=$(($OG_NUM + $LINE_NUM)) seqkit seq $FILE -m4000 -M5500 >> "TRIM_FILE".fastq done FWD_LINES=$(cat TRIM_FILE.fastq | wc -l) FWD_COUNT=$(echo "scale=0 ; $FWD_LINES / 4" | bc) #GENERATE METADATA echo "The original files contained:" $OG_NUM "reads." >> "Metadata".txt echo "The new filtered reads file contains:" "$FWD_COUNT" "reads." >> "Metadata".txt REFS=$(ls *.fasta) for FILE in $REFS do chmod +x $FILE done MAP="map" #FOR-LOOP MAPPING READS TO EACH REFERENCE SEQ - OUTPUT AS SAM FILE for FILE in $REFS do STRIP=$(basename $FILE .fasta) minimap2 -ax map-ont --no-long-join -g100 --secondary=no -Y --sam-hit-only $FILE TRIM_FILE.fastq | samtools sort -o Mapped/$STRIP-$MAP.bam done #FILTER FOR BEST MATCHES AND WRITE TO NEW SAM FILE (FWD) cd Mapped MAPPED=$(ls *.bam) FILT="_filtered" mkdir Filtered for FILE in $MAPPED do STRIP=$(basename $FILE .bam) samtools view -h $FILE | grep -E 'AS:i:7[5-9][0-9][0-9]|AS:i:[8-9][0-9][0-9][0-9]' > Filtered/"$STRIP"$FILT.bam done #WRITE TEXT FILE WITH SEQUENCE ID AS FILENAME AND REF ID AS STRING (FWD) cd Filtered FILTERED=$(ls *_filtered.bam) DUP="duplicated_" sort $FILTERED | uniq -w37 -d >> "duplicates".bam DUPID=$(cat duplicates.bam | cut -f1) echo $DUPID for DOC in $FILTERED do NUM_LINES=$(cat -n $DOC) LINE_NO=$(cat $DOC | wc -l) i=1 while [ "$i" -le "$LINE_NO" ]; do echo "$i" SEQID=$(awk -v var="$i" 'NR==var' $DOC | cut -f1) REFID=$(awk -v var="$i" 'NR==var' $DOC | cut -f3) i=$(($i + 1)) if [[ "$DUPID" == *"$SEQID"* ]]; then echo "$REFID" > "$DUP""$SEQID".txt else echo "$REFID" > "$SEQID".txt fi done done echo "Alignment completed."