import re
from turtle import color
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
from lmfit.models import GaussianModel
import scipy.stats
from scipy.signal import savgol_filter
from statsmodels.nonparametric.kernel_regression import KernelReg
from scipy.signal import find_peaks
#from statsmodels.nonparametric.kernel_regression import kernelReg
liney=[]
linex=[]
def smooth(y, box_pts):
box = np.ones(box_pts)/box_pts
y_smooth = np.convolve(y, box, mode='same')
return y_smooth
def ipddis(ipdcsv):
linex=[]
liney=[]
with open(ipdcsv, 'r') as f:
for line in f:
row = line.strip().split('\t')
liney.append(int(row[1]))
#print(row[0])
if re.findall(r'\((-?\d+.*),', row[0]):
seobj = re.findall(r'\((-?.*),', row[0])
linex.append(float(seobj[0]))
return(linex, liney)
def gaussian(x, am, sigma, mu):
y = am/(np.sqrt(2*np.pi*(sigma**2)))*np.exp(-0.5*(x-mu)**2/(sigma**2))
return y
def modelpre(dframe):
mf_rframe = dframe[dframe['x']>=1.6].reset_index()
print(mf_rframe)
modelr = GaussianModel()
params_r = modelr.guess(mf_rframe['y'], x=mf_rframe['x'])
result_r = modelr.fit(mf_rframe['y'], params_r, x=mf_rframe['x'])
sigma_r = result_r.best_values['sigma']
center_r = result_r.best_values['center']
amp_r = result_r.best_values['amplitude']
residen = abs(dframe[(dframe['x']<=center_r) & (dframe['x']>=0)]['y']-gaussian(dframe[(dframe['x']<=center_r) & (dframe['x']>=0)]['x'],amp_r,sigma_r,center_r)-gaussian(dframe[(dframe['x']<=center_r) & (dframe['x']>=0)]['x'],amp_r,sigma_r,center_r))
resi_cross = dframe['x'][residen.idxmin()]
#resi_cross = 2.15
crosspoint = abs(dframe[(dframe['x']<=2) & (dframe['x']>=0)]['y']-gaussian(dframe[(dframe['x']<=2) & (dframe['x']>=0)]['x'],amp_r,sigma_r,center_r))
cros2x = dframe['x'][crosspoint.idxmin()]
cros2x=2
fpr_t = sum(abs((dframe[(dframe['x']>=resi_cross) & (dframe['x']<=cros2x)]['y']-gaussian(dframe[(dframe['x']>=resi_cross) & (dframe['x']<=cros2x)]['x'],amp_r,sigma_r,center_r))))
nor_t = sum(dframe[dframe['x']>=resi_cross]['y'])
fnr = scipy.stats.norm(center_r,sigma_r).cdf(resi_cross)
return(resi_cross, cros2x, fnr, fpr_t, nor_t, amp_r, sigma_r, center_r)
def lmodelpre(dframe):
mf_rframe = dframe[dframe['x']<=0.5].reset_index()
#print(mf_rframe)
modelr = GaussianModel()
params_r = modelr.guess(mf_rframe['y'], x=mf_rframe['x'])
result_r = modelr.fit(mf_rframe['y'], params_r, x=mf_rframe['x'])
sigma_r = result_r.best_values['sigma']
center_r = result_r.best_values['center']
amp_r = result_r.best_values['amplitude']
#print(sigma_r, center_r, amp_r)
residen = abs(dframe[(dframe['x']>=center_r) & (dframe['x']<=1)]['y']-gaussian(dframe[(dframe['x']>=center_r) & (dframe['x']<=1)]['x'],amp_r,sigma_r,center_r)-gaussian(dframe[(dframe['x']>=center_r) & (dframe['x']<=1)]['x'],amp_r,sigma_r,center_r))
#print(residen)
resi_cross = dframe['x'][residen.idxmin()]
#print(resi_cross)
#resi_cross = 1.4854
crosspoint = abs(dframe[(dframe['x']>=-5) & (dframe['x']<=0)]['y']-gaussian(dframe[(dframe['x']>=-2) & (dframe['x']<=0)]['x'],amp_r,sigma_r,center_r))
cros2x = dframe['x'][crosspoint.idxmin()]
cros2x=-2
fpr_t = sum(abs((dframe[(dframe['x']<=resi_cross) & (dframe['x']>=cros2x)]['y']-gaussian(dframe[(dframe['x']<=resi_cross) & (dframe['x']>=cros2x)]['x'],amp_r,sigma_r,center_r))))
nor_t = sum(dframe[dframe['x']>=resi_cross]['y'])
fnr = 1 - scipy.stats.norm(center_r,sigma_r).cdf(resi_cross)
return(resi_cross, cros2x, fnr, fpr_t, nor_t, amp_r, sigma_r, center_r)
negx, negy = ipddis('./sb210_tetrahymena/Rep_2/allmacat_ipd.csv')
negymax = round(np.max(negy)*0.5)
#fig = plt.figure()
negdata = {'x':negx, 'y':negy}
negdata = pd.DataFrame(negdata)
#chrx, chry = ipddis('chrallipd.csv')
#chrymax = round(np.max(chry)*0.3)
#fig = plt.figure()
#chrdata = {'x':chrx, 'y':chry}
#chrdata = pd.DataFrame(chrdata)
posx, posy = ipddis('./allat_brduipd.csv')
posy_smooth = smooth(posy, 10)
ymax = round(np.max(posy)*0.5)
peaks, _ = find_peaks(posy_smooth, height=round(np.max(posy)*0.01))
peaks, _ = find_peaks(-posy_smooth)
print(peaks)
peakv = [posx[x] for x in peaks]
print(peakv)
#fig = plt.figure()
posdata = {'x':posx, 'y':posy}
posdata = pd.DataFrame(posdata)
kr = KernelReg(posy, posx, 'c')
posy_pre, posy_std = kr.fit(posx)
resi_cross, cros2x, fnr, fpr_t, nor_t, amp_r, sigma_r, center_r = modelpre(posdata)
fpt = fpr_t/nor_t
print(fnr, fpt)
#fig, (ax1, ax2) = plt.subplots(2,1)
#print(posdata['x'])
plt.scatter(posdata['x'],posdata['y']-gaussian(posdata['x'],amp_r,sigma_r,center_r),s=0.5, color='blue', alpha=0.5, label='residual')
#plt.scatter(posdata['x'],gaussian(posdata['x'],amp_r,sigma_r,center_r),s=1 ,label='best fit')
plt.plot(posdata['x'],gaussian(posdata['x'],amp_r,sigma_r,center_r),linewidth=0.5, color='red',label='best fit')
plt.vlines([resi_cross], 0, 2000000, linestyles=(0,(5,10)),linewidth=1, colors='black')
print(resi_cross)
#plt.set(ylim=(0, ymax))
plt.ylim(0,1500000)
plt.xlim(-2, 4)
#plt.xlim(0,1.8)
##left peak##
#tfpn= np.arange(resi_cross, 2, 1/100)
#plt.fill_between(x=tfpn,y1=gaussian(tfpn,amp_r,sigma_r,center_r), color='gray', alpha=0.2
#)
##left peak##
#tfnn = np.arange(-2, resi_cross, 0.05)
#plt.fill_between(
# x=posdata['x'], y1=posdata['y']-gaussian(posdata['x'],amp_r,sigma_r,center_r),
# where=(posdata['x']>0)&(posdata['x']<=resi_cross),
# color='red',
# alpha=0.2
#)
##right peak##
tfpn= np.arange(0,resi_cross+0.01, 1/100)
plt.fill_between(x=tfpn,y1=gaussian(tfpn,amp_r,sigma_r,center_r), color='red', alpha=0.2
)
tfnn = np.arange(resi_cross, 2, 0.05)
plt.fill_between(
x=posdata['x'], y1=posdata['y']-gaussian(posdata['x'],amp_r,sigma_r,center_r),
where=(posdata['x']>=resi_cross)&(posdata['x']<=2),
color='gray',
alpha=0.2
)
plt.style.use('seaborn-whitegrid')
#ax1.set_ylim(0, 400000)
#plt.scatter(negx, negy, s=2, label = 'AN')
#plt.scatter(chrx, chry, s=2, label = 'chr_AN')
plt.scatter(posx, posy, s=2, label = 'Brdu_1.5h Sb210', color='black', alpha=0.8)
#plt.scatter(negx, negy, s=1, label = 'AN')
#plt.plot(chrx, chry, label = 'chr_AN')
#plt.scatter(posx, posy, s=1, color='red', label = 'AN')
#plt.scatter(posx, posy_pre, s=1, label = 'AN_fit')
#plt.xticks([])
#x,y = ipddis('allat_ipd.csv')
#ax2.vlines([resi_cross], 0, 4000000, linestyles=(0,(5,10)),linewidth=1, colors='black')
#ax2.scatter(x, y, s=1.0, label = 'Mic', color='black', alpha=0.6)
#x,y = ipddis('allac_ipd.csv')
#ax2.scatter(x, y, s=1.0, label = 'Mito', color='blue', alpha=0.6)
#x,y = ipddis('allac_ipd.csv')
#plt.plot(x, y, linewidth=2.0, label = 'AC')
#x,y = ipddis('allag_ipd.csv')
#plt.plot(x, y, linewidth=2.0, label = 'AG')
plt.legend()
plt.show()