"""
Mass list of aminoacids, protenimodifications, glycan monomers, etc.
Contains also all functions for mass calculation.
Source of mass values from:
http://web.expasy.org/findmod/findmod_masses.html
"""
#我需要知道那些支链也碎的mz,所以我需要知道怎么表明立体结构,以糖所处的位置为糖峰标号,新糖峰标记,比如以前的BY无法标支链碎裂峰,现在以糖来做下标
#pglyco结构转换为图结构
#可以做ficharge到percursor charge,丢水,丢NH3, ?三电荷的碎片有吗,我有看过,那还是保留吧 谱图预测可以快速匹配大量碎片
#对于糖来说,如果对称糖结构,那么不同的来源会有相同的mz,出现在一个峰上,可能只能强度平分
#所有的函数都汇聚在pepfragmass()函数了,可以只用那个函数
#HCD 碎裂,by离子会把糖也全部丢掉,而且只碎糖的时候也会碎裂多根键 找一下HCD碎裂的诊断离子表
#所有可能的糖结构,需要能做成可以理解的模式。
# 。其实还是需要考虑一下,为什么要做谱图搜索,对于算法而言,糖产生一些细小的结构改变,只是去做一些分析。但是对谱图预测而言,需要重新去预测。那我现在最多只能拿已有的糖去做谱图预测。
#IgG每个位点有的糖就那么几十种,实在不行可以按照那个来。
#糖的峰要慢慢check,注意一下mz
#单糖可以通过多步反应,掉中间那个,可以不预测,只作为诊断离子,HCD模式如何计算碎裂多次
#用ETD模式,CID模式,和HCD模式,依次测试
#可以结合sequence searching 和spectral searching,先匹配预测的mz,再匹配预测的强度
#搜索做断两次+单糖+丢Fuc,预测做丢一次+丢Fuc
#有不同的碎裂模式,不同的碎裂模式要用相应的谱图验证
import re
import copy
import dgl
import torch
# --------------------------- Other masses ----------------------------#
# http://www.sisweb.com/referenc/source/exactmas.htm
MASS = {}
MASS["H2O"] = 18.01057
MASS["NH3"] = 17.02655
MASS["H+"] = 1.00728
MASS["H"] = 1.007825
MASS["C"] = 12.0
MASS["N"] = 14.003074
MASS["O"] = 15.994915
MASS["S"] = 31.972072
MASS["P"] = 30.973763
MASS["Na"] = 22.989770
MASS["K"] = 38.963708
MASS["Mg"] = 23.985045
# --------------------------- Aminoacids ------------------------------#
AMINOACID = {}
AMINOACID["A"] = 71.03711
AMINOACID["R"] = 156.10111
AMINOACID["N"] = 114.04293
AMINOACID["J"] = 114.04293 #带糖的N
AMINOACID["D"] = 115.02694
AMINOACID["C"] = 103.00919
AMINOACID["E"] = 129.04259
AMINOACID["Q"] = 128.05858
AMINOACID["G"] = 57.02146
AMINOACID["H"] = 137.05891
AMINOACID["I"] = 113.08406
AMINOACID["L"] = 113.08406
AMINOACID["K"] = 128.09496
AMINOACID["M"] = 131.04049
AMINOACID["F"] = 147.06841
AMINOACID["P"] = 97.05276
AMINOACID["S"] = 87.03203
AMINOACID["T"] = 101.04768
AMINOACID["W"] = 186.07931
AMINOACID["Y"] = 163.06333
AMINOACID["V"] = 99.06841
# ------------------------------ Glycans ------------------------------#
#Glycan的列表,Monoisotopic mass,库中的所有修饰保持大写,输入的大小写都可以兼容,因为可以转换
GLYCAN = {}
GLYCAN["DHEX"] = 146.0579
GLYCAN["HEX"] = 162.0528
GLYCAN["HEXNAC"] = 203.0794
GLYCAN["NEUAC"] = 291.0954
GLYCAN["NEUGC"] = 307.0903
GLYCAN["FUC"] = 146.0579
# --------------------------- Compositions ----------------------------#
# http://www.webqc.org/aminoacids.php
COMPOSITION = {}
COMPOSITION['A'] = {'C': 3, 'H': 7, 'N': 1, 'O': 2}
COMPOSITION['C'] = {'C': 3, 'H': 7, 'N': 1, 'O': 2, 'S': 1}
COMPOSITION['D'] = {'C': 4, 'H': 7, 'N': 1, 'O': 4}
COMPOSITION['E'] = {'C': 5, 'H': 9, 'N': 1, 'O': 4}
COMPOSITION['F'] = {'C': 9, 'H': 11, 'N': 1, 'O': 2}
COMPOSITION['G'] = {'C': 2, 'H': 5, 'N': 1, 'O': 2}
COMPOSITION['H'] = {'C': 6, 'H': 9, 'N': 3, 'O': 2}
COMPOSITION['I'] = {'C': 6, 'H': 13, 'N': 1, 'O': 2}
COMPOSITION['K'] = {'C': 6, 'H': 14, 'N': 2, 'O': 2}
COMPOSITION['L'] = {'C': 6, 'H': 13, 'N': 1, 'O': 2}
COMPOSITION['M'] = {'C': 5, 'H': 11, 'N': 1, 'O': 2, 'S': 1}
COMPOSITION['N'] = {'C': 4, 'H': 8, 'N': 2, 'O': 3}
COMPOSITION['J'] = {'C': 4, 'H': 8, 'N': 2, 'O': 3}
COMPOSITION['P'] = {'C': 5, 'H': 9, 'N': 1, 'O': 2}
COMPOSITION['Q'] = {'C': 5, 'H': 10, 'N': 2, 'O': 3}
COMPOSITION['R'] = {'C': 6, 'H': 14, 'N': 4, 'O': 2}
COMPOSITION['S'] = {'C': 3, 'H': 7, 'N': 1, 'O': 3}
COMPOSITION['T'] = {'C': 4, 'H': 9, 'N': 1, 'O': 3}
COMPOSITION['V'] = {'C': 5, 'H': 11, 'N': 1, 'O': 2}
COMPOSITION['W'] = {'C': 11, 'H': 12, 'N': 2, 'O': 2}
COMPOSITION['Y'] = {'C': 9, 'H': 11, 'N': 1, 'O': 3}
COMPOSITION["HEX"] = {'C': 6, 'H': 12, 'N': 0, 'O': 6}
COMPOSITION["DHEX"] = {'C': 6, 'H': 12, 'N': 0, 'O': 5}
COMPOSITION["HEXNAC"] = {'C': 8, 'H': 15, 'N': 1, 'O': 6}
COMPOSITION["NEUAC"] = {'C': 11, 'H': 19, 'N': 1, 'O': 9}
COMPOSITION["NEUGC"] = {'C': 11, 'H': 19, 'N': 1, 'O': 10}
COMPOSITION["H2O"] = {'H': 2, 'O': 1}
# --------------------------- Proteinmodifications---------------------#
PROTEINMODIFICATION = {}
PROTEINMODIFICATION["ACE"] = {"mass": 42.0106,
"composition":{'C': 2, 'H': 2, 'O': 1}
} # Acetylation
PROTEINMODIFICATION["AMI"] = {"mass": -0.9840,
"targets": {"CTERM"},
"composition":{'H': 1, 'O': -1, 'N': 1}
} # Amidation
# Cys_CAM Idoacetamide treatment (carbamidation)
PROTEINMODIFICATION["CAR"] = {"mass": 57.021464,
"targets": {"C","NTERM"},
"composition":{'C':2, 'H':3, 'O':1, 'N':1}
}
# Cys_CM, Iodoacetic acid treatment (carboxylation)
PROTEINMODIFICATION["CM"] = {"mass": 58.005479,
"targets": {"C"},
"composition":{'C': 2, 'H': 2, 'O': 2}
}
PROTEINMODIFICATION["DEA"] = {"mass": 0.9840,
"targets": {"N", "Q"},
"composition":{'H': -1, 'O': 1, 'N': -1}
} # Deamidation
PROTEINMODIFICATION["DEH"] = {"mass": -18.0106,
"targets": {"S", "T"},
"composition":{'H': -2, 'O': -1}
} # Dehydration Serine
PROTEINMODIFICATION["GUA"] = {"mass": 42.0218,
"targets": {"K"},
"composition":{'C': 1, 'H': 2,'N': 2}
} # K Guandination
PROTEINMODIFICATION["HYD"] = {"mass": 15.9949,
"composition":{'O': 1}
} # Hydroxylation
PROTEINMODIFICATION["MET"] = {"mass": 14.0157,
"composition":{'C': 1, 'H': 2}
} # Methylation
PROTEINMODIFICATION["OXI"] = {"mass": 15.9949,
"targets": {"M"},
"composition":{'O': 1}
} # MSO
PROTEINMODIFICATION["GLY"] = {"mass":0.0 ,
"targets": {"N", "S", "T"},
"composition":{}
}
#按照后修饰和氨基酸对PROTEINMODIFICATION里的后修饰进行排序
def getModificationNames():
""" Return a sorted list of Modification names.
Only contains modifications with targets declaration"""
def sort_names(name):
if "_" in name:
amino, mod = name.split("_")
return mod, amino
return name, " "
names = set()
for key in PROTEINMODIFICATION:
if "targets" in PROTEINMODIFICATION[key]:
names.add(key)
return sorted(names, key=sort_names)
# print("Modifications in list",getModificationNames())
# ------------------------------- Glycan graph ---------------------------#
#表示糖的立体结构,通过DFS计算碎裂一条边以后,剩下的糖部分
node2dict={0:"peptide",1:"HexNAc",2:"Hex",3:"Fuc",4:"NeuAc",5:"NeuGc"}
node2char={"P":"peptide","H" : "Hex", "N" : "HexNAc", "A" : "NeuAc", "G" : "NeuGc" , "F" :"Fuc"}
node2idx={"P":0,"H" : 2, "N" : 1, "A" : 4, "G" : 5 , "F" :3}
def peptide_process(peptide):
"""Create dict for input sequende.
Input:LSVECAJK_5.Car._6_2_(N(F)(N(H(H)(N)(H(N)))))
Output:{'sequence': 'LSVECAJK', 'charge': '2', 'modifications': [{'name': '(Car)1', 'amino': 'C', 'position': 4, 'type': 'mod'},
{'name': '(Hex)3(HexNAc)4(Fuc)1', 'amino': 'J', 'position': 6, 'structure': '(N(F)(N(H(H)(N)(H(N)))))', 'type': 'glyco'}]}
"""
a=peptide.split("_")
sequence=a[0]
# print(sequence)
charge=a[3]
mod_dict={}
mod_dictg={}
mod_list=[]
if a[1] !="None":
mod=a[1].rstrip(".").split(".")
# print(mod)
for i in range(0,len(mod),2):
mod_dict=copy.deepcopy(mod_dict)
mod_dict["name"]="("+mod[i+1]+")1"
mod_dict["amino"]=sequence[int(mod[i])-1]
mod_dict["position"]=int(mod[i])-1
mod_dict["type"]="mod"
mod_list.append(mod_dict)
# print("mod_list",mod_list)
glyco=a[4]
# print("glyco",glyco)
position=int(a[2])
composition={"H":str(glyco.count("H")),"N":str(glyco.count("N")),
"A":str(glyco.count("A")),"G":str(glyco.count("G")),
"F":str(glyco.count("F"))}
comp_str=""
for k in list(composition.keys()):
if composition[k]!="0":
comp_str+="("+node2char[k]+")"
comp_str+=str(composition[k])
mod_dictg["name"]=comp_str
mod_dictg["amino"]=peptide[position]
# print(mod_dictg["amino"])
mod_dictg["position"]=position
mod_dictg["structure"]=glyco
mod_dictg["type"]="glyco"
mod_list.append(mod_dictg)
peptide_dict={"sequence":sequence,"charge":charge,"modifications":mod_list}
# print(peptide)
# print(peptide_dict)
return peptide_dict
def glyco_process_0(glyco:str):
glyco_ind=re.sub("\w","",glyco)
glyco_cha=re.sub("\W","",glyco)
# ipdb.set_trace()
if len(glyco_ind) != 2*len(glyco_cha):
print("alert:len(glyco_ind) != 2*len(glyco_cha)")
raise AssertionError
node1_index=-1
node2_index=0
node1="P"
node_group=[]
# ipdb.set_trace()
for i in range(len(glyco_ind)):
if glyco_ind[i] =="(":
node2=glyco_cha[node2_index]
node1_index+=1
node2_index+=1
node=[node1_index,node2_index]
node_group.append(node)
# ipdb.set_trace()
# print(node_group)
# print("(",node1_index)
# print("(",node2_index)
node1=node2
node1_index=node2_index-1
if glyco_ind[i]==")":
node1_index-=1
node1=glyco_cha[node1_index]
# print(")",node1_index)
# ipdb.set_trace()
return node1_index,node2_index,node_group,glyco_cha
def glyco_process(glyco:str):
glyco_ind=re.sub("\w","",glyco)
glyco_cha=re.sub("\W","",glyco)
# ipdb.set_trace()
if len(glyco_ind) != 2*len(glyco_cha):
print("alert:len(glyco_ind) != 2*len(glyco_cha)")
raise AssertionError
##双指针,同时需要dict指明str的index和node index的关系
node1_ptr=0
node2_ptr=0
node1=0##多余的起点
indict={-1:0,}
node_count=0
edge_index=[]
stack=[-2]
# ipdb.set_trace()
while node2_ptr < len(glyco):
if glyco[node2_ptr]=="(":
# print("(")
node1_ptr=stack[-1]
stack.append(node2_ptr)
elif glyco[node2_ptr]==")":
# print(")")
node1_ptr=stack.pop()
else:##碰到字母
# print("add")
node_count+=1
indict[node2_ptr]=node_count
edge_index.append([indict[node1_ptr+1],indict[node2_ptr]])
# print(node2_ptr)
# ipdb.set_trace()
node2_ptr+=1
# print(edge_index)
return node1_ptr,node2_ptr,edge_index,glyco_cha
# print(glyco_process("(N(N(H(H)(H(H))))(F))"))
# glyco_process_0("(N(N(H(H)(H(H))))(F))")
# glyco_process_0("(N(F)(N(H(H(N(H)))(H(N(H(G)))))))")
# glyco_process_0("(N(N(H(H(H(H)))(H(H(H(H)))(H(H(H(H))))))))")
# glyco_process_0("(N(N(H(H(N(H(A)))(N(H)))(H(H))))(F))")
# glyco_process("(N(N(H(H(N(H(A)))(N(H)))(H(H))))(F))")
# ipdb.set_trace()
# global diff
# diff=0
# def glycoprocess_diff(glyco):
# glyco=glyco["H,N,A,G,F"]
# graphedge_0=glyco_process_0(glyco)[2]
# graphedge_1=glyco_process(glyco)[2]
# if graphedge_0!= graphedge_1:
# # print(glyco)
# # print(graphedge_1)
# # print(graphedge_0)
# global diff
# diff+=1
# # print(diff)
# # ipdb.set_trace()
# return diff
# # ipdb.set_trace()
# import pandas as pd
# mouse_gdb=pd.read_csv("/remote-home/yxwang/test/zzb/DeepGlyco/DeepSweet_v1/code/task_processing/AHGF/pGlyco-N-Mouse.gdb",sep="\t")
# mouse_gdb.apply(glycoprocess_diff,axis=1)
# ipdb.set_trace()
def GlycanFrag_struc(glyco_graph):
edge_index=glyco_graph[2]
# print("edge_index",edge_index)
#eg.[[0, 1], [1, 2], [1, 3], [3, 4], [4, 5], [4, 6], [6, 7], [7, 8], [5, 9], [9, 10], [10, 11]]
nodef="P"+glyco_graph[3]
nodef=[node2idx[i] for i in nodef]
# print("nodef",nodef)
#eg. nodef [0, 1, 3, 1, 2, 1, 2, 1, 2, 2, 1, 3]
g=dgl.graph(edge_index)
g.ndata["mononer"]=torch.Tensor(nodef).to(int)
return g
def GlycanFrag(glyco_graph):
edge_index=glyco_graph[2]
# print("edge_index",edge_index)
#eg.[[0, 1], [1, 2], [1, 3], [3, 4], [4, 5], [4, 6], [6, 7], [7, 8], [5, 9], [9, 10], [10, 11]]
nodef="P"+glyco_graph[3]
nodef=[node2idx[i] for i in nodef]
# print("nodef",nodef)
#eg. nodef [0, 1, 3, 1, 2, 1, 2, 1, 2, 2, 1, 3]
g=dgl.graph(edge_index)
g.ndata["mononer"]=torch.Tensor(nodef).to(int)
glycanfrag={}
for eid in range(len(g.edges()[0])):
graphe=dgl.remove_edges(g,eid)
removed_edge=g.edges()[0][eid].item(),g.edges()[1][eid].item()
#c从这里可以看出,removed_edge是按照eid的顺序从0到len(g.edges()[0])从图上裂掉的。所以removed_edge的顺序就是eid的顺序
eid_res=list(dgl.dfs_edges_generator(graphe,0))
noderes=[0]
graphe_edges=graphe.edges()
for eid2 in eid_res:
node1,node2=graphe_edges[0][eid2],graphe_edges[1][eid2]
noderes.append(node1.item())
noderes.append(node2.item())
noderes=list(set(noderes))
res_monomer=g.ndata["mononer"][noderes].numpy().tolist()
res_sugar=[node2dict[i] for i in res_monomer if i != 0]
# print("removed_edge:",removed_edge,"res_index:",res_monomer,"res_sugar:",res_sugar)
glycanfrag[removed_edge]=res_sugar
return glycanfrag
def GlycanFrag_HCD(glyco_graph):
edge_index=glyco_graph[2]
# print("edge_index",edge_index)
nodef="P"+glyco_graph[3]
nodef=[node2idx[i] for i in nodef]
# print("nodef",nodef)
g=dgl.graph(edge_index)
g.ndata["mononer"]=torch.Tensor(nodef).to(int)
glycanfrag={}
# print("g.edges",g.edges())
for eid in range(len(g.edges()[0])):
for eid1 in range(eid,len(g.edges()[0])):
graphe=dgl.remove_edges(g,[eid,eid1])
removed_edge1=g.edges()[0][eid].item(),g.edges()[1][eid].item()
removed_edge2=g.edges()[0][eid1].item(),g.edges()[1][eid1].item()
removed_edge=[removed_edge1,removed_edge2]
# ------------------- turn to bidirectional -------------------
u , v = graphe.edges(order = "eid")
graphe.add_edges(v , u) # bidirect
# print("removed_edge_list",removed_edge)
three_splits=[]
for dfs_initial_node in [removed_edge1[0],removed_edge1[1],removed_edge2[1]]:
# print("dfs_initial_node:",dfs_initial_node)
noderes=[dfs_initial_node]
eid_res=list(dgl.dfs_edges_generator(graphe,dfs_initial_node))
#很奇怪,这里可以接受0,但是不能接受removed_edge1[0] removed_edge1[1] removed_edge2[2]
# print("removed_edge1[0]",removed_edge1[0])
# print("eid_res",eid_res)
graphe_edges=graphe.edges()
# print("graphe_edges",graphe_edges)
for eid2 in eid_res:
# print("eid_res_eid2",eid2)
node1,node2=graphe_edges[0][eid2],graphe_edges[1][eid2]
noderes.append(node1.item())
noderes.append(node2.item())
noderes=list(set(noderes))
# ipdb.set_trace()
res_monomer=g.ndata["mononer"][noderes].numpy().tolist()
res_sugar=[node2dict[i] for i in res_monomer if i != 0]
three_splits.append( {"res_index":res_monomer,"res_sugar":res_sugar})
# print("removed_edge:",removed_edge,"three_splits:", three_splits)
removed_str=""
for n in removed_edge:
for n1 in n:
removed_str+=str(n1)
glycanfrag[removed_str]=three_splits
return glycanfrag
# import ipdb
# ipdb.set_trace()
# glyco_graph=glyco_process("(N(F)(N(H(H)(H))))")
# glycanfrag=GlycanFrag_HCD(glyco_graph)
# print(glycanfrag)
#对得到的糖碎片计算mz,算好了放到最后一个函数,做整合
# ------------------------------- Functions ---------------------------#
#计算肽段部分质量
def calcPeptideMass(peptide):
mass = 0
for s in peptide["sequence"]:
try:
mass += AMINOACID[s]
except KeyError:
print("Unknown Aminoacid '"+s+"'!")
raise
return mass
# print("Peptide",peptide)
# peptidemass=calcPeptideMass(peptide)
# print("Mass for peptide without modifications",peptidemass)
#计算后修饰的总质量
def calcModificationMass(peptide):
mass = 0
if peptide["modifications"] is None:
return mass
if len(peptide["modifications"]) ==0:
return mass
# TODO: Checks for modification consistency
# a) amount of amino acids must be in sequence
# b) a given position (except -1) can only be
for mod in peptide["modifications"]:
name=mod["name"]
for part in re.findall(r"\(.+?\)-?\d+", name.upper()):
monomer, amount = part.split(")")
monomer = monomer[1:]
amount = int(amount)
if monomer in GLYCAN:
mass += GLYCAN[monomer]*amount
elif monomer in MASS:
mass += MASS[monomer]*amount
elif monomer in PROTEINMODIFICATION:
mass += PROTEINMODIFICATION[monomer]["mass"]*amount
else:
raise Exception("cannot find monomer {} in {}".format(monomer, name))
return mass
# modificationmass=calcModificationMass(peptide)
# print("mass for modifications",modificationmass)
#算修饰的mz ver:算糖基质量,可以算加和物质
def calcModpepMass(peptide):
peptidemass = calcPeptideMass(peptide)
modificationmass = calcModificationMass(peptide)
ModpepMass=peptidemass+modificationmass
return ModpepMass
# ModpepMass=calcModpepMass(peptide)
# print("Mass for peptide with modifications",ModpepMass)
def pepfragmass(input,mode,maxcharge=3):
peptide=peptide_process(input)
# print("peptide",peptide)
for modification in peptide["modifications"]:
name=modification["name"].replace(" ", "")
if re.match(r"^(\(.+?\)-?\d+)+$", name) == None:
print("name",name)
raise Exception(r"""Input string '{}' doesn't follow the
regex '^(\(.+?\)-?\d+)+$'
Please surrond monomers by brackets followed by the amount of
the monomer, e.g. '(NeuAc)1(H2O)-1(H+)1'""".format(name))
sequence=peptide["sequence"]
length=len(sequence)
FragCharge=list(range(1,min((int(peptide["charge"])+1),maxcharge)))#FragCharge最大值调小了
b_ions=[]
y_ions=[]
B_ions=[]
Y_ions=[]
# 计算b/y离子
#检查一下糖的碎片模式,碎片电荷以及中性丢失的情况
#M+H-H2O,M+H, M+2H-H2O, M+2H ...
if "ETD" in mode: #ETD模式意味着,随着位置糖全部保留,然后碎裂肽段,ETD是cz碎裂注意一下
ModpepMass = calcModpepMass(peptide)+MASS["H2O"]
for FrgNumC in range(1,len(sequence)):
Frgmodification=[]
for modification in peptide["modifications"]:
if modification["position"]<=FrgNumC-1:
Frgmodification.append(modification)
Fragpepb=sequence[:FrgNumC]
Fragb={"sequence":Fragpepb,"modifications":Frgmodification}
#mass = MASS["H2O"] check一下 percursor, b/y ions, B/Y ions的质量
FragbMass = calcModpepMass(Fragb)+MASS["N"]+MASS["H"]
FragyMass=ModpepMass-FragbMass
for ficharge in FragCharge:
for loss_type in ["noloss","loss_H2O","loss_NH3"]:
if loss_type=="noloss":
FragTypeb= "c"+str(FrgNumC)+"_"+str(ficharge)
bion={FragTypeb:FragbMass/ficharge+MASS["H+"]}
b_ions.append(bion)
# print(b_ions)
FragTypey = "z" + str(len(sequence)-FrgNumC) + "_" + str(ficharge)
yion = {FragTypey: FragyMass / ficharge+MASS["H+"]}
y_ions.append(yion)
if loss_type=="loss_H2O":
FragTypeb= "c"+str(FrgNumC)+"_"+str(ficharge)+"_loss_H2O"
bion={FragTypeb:(FragbMass-MASS["H2O"])/ficharge+MASS["H+"]}
b_ions.append(bion)
# print(b_ions)
FragTypey = "z" + str(len(sequence)-FrgNumC) + "_" + str(ficharge)+"_loss_H2O"
yion = {FragTypey: (FragyMass-MASS["H2O"]) / ficharge+MASS["H+"]}
y_ions.append(yion)
if loss_type=="loss_NH3":
FragTypeb= "c"+str(FrgNumC)+"_"+str(ficharge)+"_loss_NH3"
bion={FragTypeb:(FragbMass-MASS["NH3"])/ficharge+MASS["H+"]}
b_ions.append(bion)
# print(b_ions)
FragTypey = "z" + str(len(sequence)-FrgNumC) + "_" + str(ficharge)+"_loss_NH3"
yion = {FragTypey: (FragyMass-MASS["NH3"]) / ficharge+MASS["H+"]}
y_ions.append(yion)
#计算B/Y ions
if "HCD_1" in mode: #HCD_1 mode意味着肽段全保留,糖部分碎一刀,另外如果有Fuc可以丢Fuc
ModpepMass = calcModpepMass(peptide)+MASS["H2O"]
for i in peptide["modifications"]:
if i["type"]=="glyco":
structure=i["structure"]
glyco_graph=glyco_process(structure)
# import ipdb
# ipdb.set_trace()
glycanfrag=GlycanFrag(glyco_graph)
keys=list(glycanfrag.keys())
values=list(glycanfrag.values())
# print(values)
# print(keys)
peptide_copy=copy.deepcopy(peptide)
# import ipdb
# ipdb.set_trace()
# print("FragY",peptide)
# print("FragY2",peptide_copy)
for k in range(len(keys)):
#将k认为是eid的列表,可以考虑是不是对
Glycan=values[k]
# print("Glycan",Glycan)
composition={"H":Glycan.count("Hex"),"N":Glycan.count("HexNAc"),
"A":Glycan.count("NeuAc"),"G":Glycan.count("NeuGc"),"F":Glycan.count("Fuc")}
comp_str=""
for m in list(composition.keys()):
if composition[m]!=0:
comp_str+="("+node2char[m]+")"
comp_str+=str(composition[m])
i["name"]=comp_str #这里改变了peptide,存了peptide的原始备份peptide_copy
# print("comp_str",comp_str)
FragYMass = calcModpepMass(peptide)+MASS["H2O"] #h2o需要核实
# print(FragYMass)
FragBMass=ModpepMass-FragYMass
for ficharge in FragCharge:
for loss_type in ["noloss","loss_H2O","loss_NH3","loss_FUC"]:
if loss_type=="noloss":
FragTypeY = "Y" + str(keys[k][0]) + str(keys[k][1])+"_" + str(k) + "_"+ str(ficharge)
Yion = {FragTypeY: round(FragYMass / ficharge+MASS["H+"],5)}
Y_ions.append(Yion)
FragTypeB= "B"+ str(keys[k][0]) + str(keys[k][1])+"_" + str(k) + "_" + str(ficharge)
Bion = {FragTypeB: round(FragBMass / ficharge+MASS["H+"],5)}
B_ions.append(Bion)
if loss_type=="loss_H2O":
FragTypeY= "Y" + str(keys[k][0]) + str(keys[k][1])+"_" + str(k) + "_" + str(ficharge)+"_loss_H2O"
Yion={FragTypeY: round((FragYMass-MASS["H2O"]) / ficharge+MASS["H+"],5)}
Y_ions.append(Yion)
FragTypeB = "B"+ str(keys[k][0]) + str(keys[k][1])+"_" + str(k) + "_" + str(ficharge)+"_loss_H2O"
Bion = {FragTypeB: round((FragBMass-MASS["H2O"]) / ficharge+MASS["H+"],5)}
B_ions.append(Bion)
if loss_type=="loss_NH3":
FragTypeY= "Y" + str(keys[k][0]) + str(keys[k][1])+"_" + str(k) + "_" + str(ficharge)+"_loss_NH3"
Yion={FragTypeY: round((FragYMass-MASS["NH3"]) / ficharge+MASS["H+"],5)}
Y_ions.append(Yion)
FragTypeB = "B"+ str(keys[k][0]) + str(keys[k][1])+"_" + str(k) + "_" + str(ficharge)+"_loss_NH3"
Bion = {FragTypeB: round((FragBMass-MASS["NH3"]) / ficharge+MASS["H+"],5)}
B_ions.append(Bion)
if len(Glycan)>1 and Glycan[1]== "Fuc" and loss_type=="loss_FUC":
FragTypeY= "Y" + str(keys[k][0]) + str(keys[k][1])+"_" + str(k) + "_" + str(ficharge)+"_loss_FUC"
Yion={FragTypeY: round((FragYMass-GLYCAN["FUC"]) / ficharge+MASS["H+"],5)}
Y_ions.append(Yion)
peptide=peptide_copy
# print(Y_ions)
if "HCD_by" in mode:
#HCD的b,y碎片需要脱糖,或者有一个HexNac
copy_num=0
for gly_HCD in ["(HexNAc)0","(HexNAc)1"]:
for modification in peptide["modifications"]:
if modification["type"]=="glyco":
if copy_num==0:
glyco_mod=copy.deepcopy(modification["name"])
glysite=modification["position"]
# print("glyco_mod",glyco_mod)
copy_num+=1
# assert modification["structure"][1] =="N"
modification["name"]=gly_HCD
glyco_aftermod=copy.deepcopy(modification)
ModpepMass = calcModpepMass(peptide)+MASS["H2O"]
for FrgNumC in range(1,len(sequence)):
Frgmodification=[]
for modification in peptide["modifications"]:
if modification["position"]<=FrgNumC-1:
Frgmodification.append(modification)
Fragpepb=sequence[:FrgNumC]
Fragb={"sequence":Fragpepb,"modifications":Frgmodification}
#mass = MASS["H2O"] check一下 percursor, b/y ions, B/Y ions的质量
FragbMass = calcModpepMass(Fragb)
FragyMass=ModpepMass-FragbMass
for ficharge in FragCharge:
for loss_type in ["noloss","loss_H2O","loss_NH3"]:
if loss_type=="noloss":
FragTypeb= "b"+str(FrgNumC)+"_"+str(ficharge)+"_"+gly_HCD
if FragTypeb.startswith("b") and int(FrgNumC)<=int(glysite) and gly_HCD=="(HexNAc)1":
pass
else:
bion={FragTypeb:round(FragbMass/ficharge+MASS["H+"],5)}
b_ions.append(bion)
# print(b_ions)
# import ipdb
# ipdb.set_trace()
FragTypey = "y" + str(length-FrgNumC) + "_" + str(ficharge)+"_"+gly_HCD
if FragTypey.startswith("y") and FrgNumC>int(glysite) and gly_HCD=="(HexNAc)1":
pass
else:
yion = {FragTypey: round(FragyMass / ficharge+MASS["H+"],5)}
y_ions.append(yion)
if loss_type=="loss_H2O":
FragTypeb= "b"+str(FrgNumC)+"_"+str(ficharge)+"_loss_H2O_"+gly_HCD
if FragTypeb.startswith("b") and int(FrgNumC)<=int(glysite) and gly_HCD=="(HexNAc)1":
pass
else:
bion={FragTypeb:round((FragbMass-MASS["H2O"])/ficharge+MASS["H+"],5)}
b_ions.append(bion)
# print(b_ions)
FragTypey = "y" + str(len(sequence)-FrgNumC) + "_" + str(ficharge)+"_loss_H2O_"+gly_HCD
if FragTypey.startswith("y") and FrgNumC>int(glysite) and gly_HCD=="(HexNAc)1":
pass
else:
yion = {FragTypey: round((FragyMass-MASS["H2O"]) / ficharge+MASS["H+"],5)}
y_ions.append(yion)
if loss_type=="loss_NH3":
FragTypeb= "b"+str(FrgNumC)+"_"+str(ficharge)+"_loss_NH3_"+gly_HCD
if FragTypeb.startswith("b") and int(FrgNumC)<=int(glysite) and gly_HCD=="(HexNAc)1":
pass
else:
bion={FragTypeb:round((FragbMass-MASS["NH3"])/ficharge+MASS["H+"],5)}
b_ions.append(bion)
# print(b_ions)
FragTypey = "y" + str(len(sequence)-FrgNumC) + "_" + str(ficharge)+"_loss_NH3_"+gly_HCD
if FragTypey.startswith("y") and FrgNumC>int(glysite) and gly_HCD=="(HexNAc)1":
pass
else:
yion = {FragTypey: round((FragyMass-MASS["NH3"]) / ficharge+MASS["H+"],5)}
y_ions.append(yion)
#HCD_1 mode意味着肽段全保留,糖部分碎一刀,另外如果有Fuc可以丢Fuc
#B/Y ions糖可以碎裂两刀或者一刀,虽然可以碎三次以上,但是碎太多,搜索空间过大,而且不具有结构区分性
#另外加上单糖
#有很多m/z重复的,需要去重
if "HCD_BY_2" in mode:
ModpepMass = calcModpepMass(peptide)+MASS["H2O"]
for i in peptide["modifications"]:
if i["type"]=="glyco":
structure=i["structure"]
# ipdb.set_trace()
p = re.compile(r'[()](.*?)[)]', re.S)
# ipdb.set_trace()
glycan_all=re.findall(p,i["name"])
# print("glycan_all",glycan_all)
# ipdb.set_trace()
# FragCharge=list(range(1,min((int(peptide["charge"])+1),4)))
for mono_sugar in glycan_all:
for ficharge in FragCharge:
for addition_type in ["","_add_H2O","_loss_H2O"]:
FragTypeB = "B_" + str(ficharge)+"_"+mono_sugar+addition_type
if addition_type =="":
FragBMass=GLYCAN[mono_sugar.upper()]
if addition_type =="_add_H2O":
FragBMass=GLYCAN[mono_sugar.upper()]+MASS["H2O"]
if addition_type =="_loss_H2O":
FragBMass=GLYCAN[mono_sugar.upper()]-MASS["H2O"]
Bion = {FragTypeB: FragBMass / ficharge+MASS["H+"]}
B_ions.append(Bion)
# ipdb.set_trace()
glyco_graph=glyco_process(structure)
glycanfrag=GlycanFrag_HCD(glyco_graph)
keys=list(glycanfrag.keys())
values=list(glycanfrag.values())
# print(values)
# print(keys)
# ipdb.set_trace()
peptide_copy=copy.deepcopy(peptide)
for k in range(len(keys)):
Glycan=values[k]
# print("Glycan",Glycan)
# print("bond",keys[k])
# ipdb.set_trace()
frg_num=0
for res in Glycan:
res_index=res["res_index"]
if 0 in res_index: #Y_ions
frg_num+=1
FragNumY=keys[k]+"_"+str(frg_num)
# print("FragNumY",FragNumY)
res_sugar=res["res_sugar"]
# print("res_sugar",res_sugar)
composition={"H":res_sugar.count("Hex"),"N":res_sugar.count("HexNAc"),
"A":res_sugar.count("NeuAc"),"G":res_sugar.count("NeuGc"),"F":res_sugar.count("Fuc")}
comp_str=""
for m in list(composition.keys()):
if composition[m]!=0:
comp_str+="("+node2char[m]+")"
comp_str+=str(composition[m])
i["name"]=comp_str #这里改变了peptide,存了peptide的原始备份peptide_copy
# ipdb.set_trace()
# print("comp_str",comp_str)
FragYMass = calcModpepMass(peptide)+MASS["H2O"] #h2o需要核实
# print(FragYMass)
for ficharge in FragCharge:
for loss_type in ["noloss","loss_H2O","loss_NH3","loss_FUC"]:
if loss_type=="noloss":
FragTypeY = "Y" + FragNumY+"_" + str(ficharge)
Yion = {FragTypeY: round(FragYMass / ficharge+MASS["H+"],5)}
Y_ions.append(Yion)
if loss_type=="loss_H2O":
FragTypeY= "Y" + FragNumY+"_" + str(ficharge)+"_loss_H2O"
Yion={FragTypeY: round((FragYMass-MASS["H2O"]) / ficharge+MASS["H+"],5)}
Y_ions.append(Yion)
if loss_type=="loss_NH3":
FragTypeY= "Y" + FragNumY+"_" + str(ficharge)+"_loss_NH3"
Yion={FragTypeY: round((FragYMass-MASS["NH3"]) / ficharge+MASS["H+"],5)}
Y_ions.append(Yion)
if len(res_sugar)>1 and res_sugar[1]== "Fuc" and loss_type=="loss_FUC":
FragTypeY= "Y" + FragNumY+"_" + str(ficharge) + "_" + str(ficharge)+"_loss_FUC"
Yion={FragTypeY: round((FragYMass-GLYCAN["FUC"]) / ficharge+MASS["H+"],5)}
Y_ions.append(Yion)
else:
frg_num+=1
FragNumB=keys[k]+"_"+str(frg_num)
# print(FragNumB)
res_sugar=res["res_sugar"]
composition={"H":res_sugar.count("Hex"),"N":res_sugar.count("HexNAc"),
"A":res_sugar.count("NeuAc"),"G":res_sugar.count("NeuGc"),"F":res_sugar.count("Fuc")}
FragBMass=0
for m in list(composition.keys()):
if composition[m]!=0:
# print(node2char[m].upper())
FragBMass+=GLYCAN[node2char[m].upper()]*composition[m]
# print("FragBMass",FragBMass)
for ficharge in FragCharge:
for loss_type in ["noloss","loss_H2O","loss_NH3","loss_FUC"]:
if loss_type=="noloss":
FragTypeB= "B"+ FragNumB+"_" + str(ficharge)
Bion = {FragTypeB: round(FragBMass / ficharge+MASS["H+"],5)}
B_ions.append(Bion)
if loss_type=="loss_H2O":
FragTypeB = "B"+ FragNumB+"_" + str(ficharge)+"_loss_H2O"
Bion = {FragTypeB: round((FragBMass-MASS["H2O"]) / ficharge+MASS["H+"],5)}
B_ions.append(Bion)
if loss_type=="loss_NH3":
if "HexNAc" in res_sugar or "NEUAC" in res_sugar or "NEUGC" in res_sugar:
# print("res_sugar",res_sugar)
FragTypeB = "B"+ FragNumB+"_" + str(ficharge)+"_loss_NH3"
Bion = {FragTypeB: round((FragBMass-MASS["NH3"]) / ficharge+MASS["H+"],5)}
B_ions.append(Bion)
if len(res_sugar)>1 and res_sugar[1]== "Fuc" and loss_type=="loss_FUC":
# print("res_sugar",res_sugar)
FragTypeB = "B"+ FragNumB+"_" + str(ficharge)+"_loss_FUC"
Bion = {FragTypeB: round((FragBMass-GLYCAN["FUC"]) / ficharge+MASS["H+"],5)}
B_ions.append(Bion)
peptide=peptide_copy
# print(Y_ions)
return b_ions,y_ions,B_ions,Y_ions
# input="AEAVGETLTLPGLVSADJGTYTCEAANK_23.Car._63002_3_(N(F)(N(H(H(H)(H))(H(N(H)(F))))))"
# input="EDGMLPAJR_None_7_3_(N(F)(N(H(H(H))(H))))"
# input="GGJGTICDNQR_7.Car._2_2_(N(F)(N(H(H(N))(H(N)(N)))))"
# mz_calc=pepfragmass(input,["HCD_by"])
# print(mz_calc)
# import ipdb
# ipdb.set_trace()
# print("Fragment MZ for modifided peptides",pepfragmass(peptide))