Beloniformes-Opsin / Scripts / consensus.py
consensus.py
Raw
#consensus.py

import sys

#rev_comp = {'A':'T', 'T':'A', 'C':'G', 'G':'C'}
seqs = {}
pileup_file = sys.argv[1]
all_lines = []

for line in open(pileup_file):
	all_lines.append(line.split())
#print(all_lines)
for i in all_lines:
	ID,pos,ref,num_reads,bases,quals,check = i
	if ID not in seqs:
		seqs[ID] = ''
	if num_reads == 0:
		seqs[ID] += "-"
	else:
		counts = {'A':0, 'C':0, 'G':0, 'T':0, "N":0}
		for c in bases:
			if c in {'.',','}:
				counts[ref] += 1
			elif c in {'A', 'C', 'G', 'T', 'N'}:
				counts[c] += 1
			elif c in {'a', 'c', 'g', 't'}:
				counts[c.upper()] += 1
		if sum(counts.values()) == 0:
			seqs[ID] += "-"
		else:
			seqs[ID] += max(counts, key=lambda key: counts[key])


species = seqs.keys()[0]
sequence = seqs.values()[0]

with open(pileup_file.split(".mpileup")[0]+"_consensus.fa", "w") as outfile:
	outfile.write(">" + species + "\n")
	outfile.write(sequence)