ml-solarwind / plot_bayes_hyperparam.py
plot_bayes_hyperparam.py
Raw
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 27 18:23:50 2020

@author: baum_c4
"""

import pickle
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from func_sorted_stratification import sorted_stratification
import numpy as np
import matplotlib.colors as mcol
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import matplotlib

# with open('best_bayes.pickle', 'rb') as f:
#     GPparams,RFparams=pickle.load(f)
# scopt_model=RandomForestRegressor(max_depth=RFparams['max_depth'],
#                                   n_estimators = 500,
#                                   random_state = 42,
#                                   max_features=RFparams['max_features'],
#                                   min_samples_split=RFparams['min_samples_split'],
#                                   min_samples_leaf=RFparams['min_samples_leaf'],
#                                   min_impurity_decrease=RFparams['min_impurity_decrease'],
#                                   #max_samples=RFparams['max_samples']
#                                   )    
# gp_model=GradientBoostingRegressor(max_depth=GPparams['max_depth'],
#                                   n_estimators = GPparams['n_estimators'],
#                                   random_state = 42,
#                                   max_features=GPparams['max_features'],
#                                   min_samples_split=GPparams['min_samples_split'],
#                                   min_samples_leaf=GPparams['min_samples_leaf'],
#                                   learning_rate=GPparams['learning_rate'],
#                                   min_impurity_decrease=GPparams['min_impurity_decrease'],
#                                   #subsample=GPparams['subsample']
#                                   )

# with open('delay_DST_learningset.pickle', 'rb') as f:
#    [learnvector,timevector]= pickle.load(f)

   
# random=42

# X=learnvector
# Y=timevector
   

    
# X,Y=sorted_stratification(learnvector,timevector,10,1)
# #with open('best_bayes_data.pickle', 'rb') as f:
#  #   XTrain,YTrain,XTest,YTest=pickle.load(f)
# baseforest=[]
# optforest=[]
# basegp=[]
# optgp=[]    
# for j in range(10):
# #jj=9
#     XTrain=np.concatenate([X[i] for i in range(len(X)) if i!=j])
#     YTrain=np.concatenate([Y[i] for i in range(len(X)) if i!=j])
#     XTest=X[j]
#     YTest=Y[j]   
#     base_model = RandomForestRegressor( random_state = 42)
#     base_model.fit(XTrain, YTrain[:,2])
#     baseparam=base_model.get_params()
#     Y_pred=base_model.predict(XTest)
#     deltapred=(Y_pred-YTest[:,2])
#     baseRMSEpred=np.sqrt(np.nansum(np.square(deltapred))/len(XTest))/60
#     baseforest.append(baseRMSEpred)
    
    
#     scopt_model.fit(XTrain, YTrain[:,2])
#     Y_scopt=scopt_model.predict(XTest)
#     scoptdelt=(Y_scopt-YTest[:,2])
    
#     scoptpara=scopt_model.get_params()
#     scoptRMSEpred=np.sqrt(np.nansum(np.square(scoptdelt))/len(XTest))/60
#     optforest.append(scoptRMSEpred)
#     basegp_model = GradientBoostingRegressor( random_state = 42)
#     basegp_model.fit(XTrain, YTrain[:,2])
#     basegpparam=basegp_model.get_params()
#     Y_pred=basegp_model.predict(XTest)
#     deltapred=(Y_pred-YTest[:,2])
#     basegpRMSEpred=np.sqrt(np.nansum(np.square(deltapred))/len(XTest))/60
#     basegp.append(basegpRMSEpred)
    
    
#     gp_model.fit(XTrain, YTrain[:,2])
#     Y_gp=gp_model.predict(XTest)
#     gppred=(Y_gp-YTest[:,2])
    
#     gppara=scopt_model.get_params()
#     gpRMSEpred=np.sqrt(np.nansum(np.square(gppred))/len(XTest))/60  
#     optgp.append(gpRMSEpred)
# #optgp.append(gpRMSEpred)



#     print(baseRMSEpred,scoptRMSEpred,basegpRMSEpred,gpRMSEpred)


with open('plot_optimization_data.pickle', 'rb') as f:
    baseforest,basegp,optforest,optgp=pickle.load(f)


barWidth = 0.2
baseforest.append(np.mean(baseforest)) 
optforest.append(np.mean(optforest))
basegp.append(np.mean(basegp))
optgp.append(np.mean(optgp))
# set height of bar
bars1 = baseforest
bars2 = optforest
bars3 = basegp
bars4=optgp
 
# Set position of bar on X axis
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
r4 = [x + barWidth for x in r3]

f,ax = plt.subplots(1,1,figsize=(13*0.39,9*0.39))
ax.bar(r1, bars1, color='red', width=barWidth, edgecolor='white', label='RF-def')
ax.bar(r2, bars2, color='green', width=barWidth, edgecolor='white', label='RF-opt')
ax.bar(r3, bars3, color='magenta', width=barWidth, edgecolor='white', label='GB-def')  
ax.bar(r4, bars4, color='blue', width=barWidth, edgecolor='white', label='GB-opt') 
plt.xticks([r + barWidth for r in range(len(bars1))],[1,2,3,4,5,6,7,8,9,10,'  mean'])
ax.legend(bbox_to_anchor=(1.33, 1), loc='upper right', ncol=1)  
ax.set_ylabel('RMSE [min]')
ax.set_xlabel('cross validation')
ax.set_title('Hyperparameter optimization')
ax.set_ylim(3,6.5)
plt.savefig('plot_hyperparameter_bayes.pdf',bbox_inches='tight')

plt.show()