# -*- coding: utf-8 -*- """ Created on Tue May 26 16:49:43 2020 @author: baum_c4 derives the RMSEs for the real time scenario """ import pickle import numpy as np #import pandas as pd from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor #from func_sorted_stratification import sorted_stratification #import shap #import matplotlib.pyplot as plt #from sklearn.svm import SVR from sklearn.linear_model import LinearRegression #from sklearn.linear_model import LogisticRegression with open('delay_DST_learningset.pickle', 'rb') as f: [learnvector,timevector]= pickle.load(f) #learnvector contains the 7 feature for 380 cases #timevector contains flat, vector and measured delay X=learnvector Y=timevector Xtrain=X[:300] Xtest=X[300:] Ytrain=Y[:300] Ytest=Y[300:] with open('best_bayes.pickle', 'rb') as f: GPparams,RFparams=pickle.load(f) #optimized random forest scopt_model=RandomForestRegressor(max_depth=RFparams['max_depth'], n_estimators = 500, random_state = 42, max_features=RFparams['max_features'], min_samples_split=RFparams['min_samples_split'], min_samples_leaf=RFparams['min_samples_leaf'], min_impurity_decrease=RFparams['min_impurity_decrease'], #max_samples=RFparams['max_samples'] ) #optimized gradient boost gb_model=GradientBoostingRegressor(max_depth=GPparams['max_depth'], n_estimators = GPparams['n_estimators'], random_state = 42, max_features=GPparams['max_features'], min_samples_split=GPparams['min_samples_split'], min_samples_leaf=GPparams['min_samples_leaf'], learning_rate=GPparams['learning_rate'], min_impurity_decrease=GPparams['min_impurity_decrease'], #subsample=GPparams['subsample'] ) scopt_model.fit(Xtrain, Ytrain[:,2]) Y_scopt=scopt_model.predict(Xtest) scoptpred=(Y_scopt-Ytest[:,2])/60# prediction in minutes scoptpara=scopt_model.get_params() RMSEscopt=np.sqrt(np.nansum(np.square(scoptpred))/len(Xtest)) gb_model.fit(Xtrain, Ytrain[:,2]) Y_gb=gb_model.predict(Xtest) gbpred=(Y_gb-Ytest[:,2])/60 RMSEboost=np.sqrt(np.nansum(np.square(gbpred))/len(Xtest)) deltaflat=(Ytest[:,1]-Ytest[:,2])/60 deltavec=(Ytest[:,0]-Ytest[:,2])/60 RMSEflat= np.sqrt(np.nansum(np.square(deltaflat))/len(Xtest)) RMSEvec=np.sqrt(np.nansum(np.square(deltavec))/len(Xtest)) ylinreg = LinearRegression().fit(Xtrain,Ytrain[:,2]).predict(Xtest) deltalinreg=(ylinreg-Ytest[:,2])/60 RMSElinreg= np.sqrt(np.nansum(np.square(deltalinreg))/len(Xtest)) RT_RMSES=np.array([RMSEscopt,RMSEboost,RMSEvec,RMSEflat,RMSElinreg]) meanYtest=np.mean(Ytest[:,2]) msemeasured=np.nansum(np.square(Ytest[:,2]/60-meanYtest/60)) msescopt=np.nansum(np.square(scoptpred)) msegb=np.nansum(np.square(gbpred)) mseflat=np.nansum(np.square(deltaflat)) msevec=np.nansum(np.square(deltavec)) mselinreg=np.nansum(np.square(deltalinreg)) R2scopt=1-msescopt/msemeasured R2gb=1-msegb/msemeasured R2flat=1-mseflat/msemeasured R2vec=1-msevec/msemeasured R2linreg=1-mselinreg/msemeasured with open('plot_R2_realtime.pickle', 'wb') as f: pickle.dump([R2scopt,R2gb,R2flat,R2vec,R2linreg], f) with open('plot_RMSE_realtime.pickle', 'wb') as f: pickle.dump(RT_RMSES, f)