ml-solarwind / realtime_machinelearning.py
realtime_machinelearning.py
Raw
# -*- coding: utf-8 -*-

"""
Created on Tue May 26 16:49:43 2020

@author: baum_c4

derives the RMSEs for the real time scenario

"""

import pickle
import numpy as np
#import pandas as pd

from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
#from func_sorted_stratification import sorted_stratification
#import shap
#import matplotlib.pyplot as plt
#from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
#from sklearn.linear_model import LogisticRegression
with open('delay_DST_learningset.pickle', 'rb') as f:
   [learnvector,timevector]= pickle.load(f)
#learnvector contains the 7 feature for 380 cases
#timevector contains flat, vector and measured delay



X=learnvector
Y=timevector
Xtrain=X[:300]
Xtest=X[300:]
Ytrain=Y[:300]
Ytest=Y[300:]


with open('best_bayes.pickle', 'rb') as f:
    GPparams,RFparams=pickle.load(f)
    
#optimized random forest
scopt_model=RandomForestRegressor(max_depth=RFparams['max_depth'],
                                  n_estimators = 500,
                                  random_state = 42,
                                  max_features=RFparams['max_features'],
                                  min_samples_split=RFparams['min_samples_split'],
                                  min_samples_leaf=RFparams['min_samples_leaf'],
                                  min_impurity_decrease=RFparams['min_impurity_decrease'],
                                  #max_samples=RFparams['max_samples']
                                  )    
#optimized gradient boost
gb_model=GradientBoostingRegressor(max_depth=GPparams['max_depth'],
                                  n_estimators = GPparams['n_estimators'],
                                  random_state = 42,
                                  max_features=GPparams['max_features'],
                                  min_samples_split=GPparams['min_samples_split'],
                                  min_samples_leaf=GPparams['min_samples_leaf'],
                                  learning_rate=GPparams['learning_rate'],
                                  min_impurity_decrease=GPparams['min_impurity_decrease'],
                                  #subsample=GPparams['subsample']
                                  )

scopt_model.fit(Xtrain, Ytrain[:,2])
Y_scopt=scopt_model.predict(Xtest)
scoptpred=(Y_scopt-Ytest[:,2])/60# prediction in minutes

scoptpara=scopt_model.get_params()
RMSEscopt=np.sqrt(np.nansum(np.square(scoptpred))/len(Xtest))




gb_model.fit(Xtrain, Ytrain[:,2])
Y_gb=gb_model.predict(Xtest)
gbpred=(Y_gb-Ytest[:,2])/60
RMSEboost=np.sqrt(np.nansum(np.square(gbpred))/len(Xtest))  
deltaflat=(Ytest[:,1]-Ytest[:,2])/60
deltavec=(Ytest[:,0]-Ytest[:,2])/60



RMSEflat= np.sqrt(np.nansum(np.square(deltaflat))/len(Xtest))

RMSEvec=np.sqrt(np.nansum(np.square(deltavec))/len(Xtest))


ylinreg = LinearRegression().fit(Xtrain,Ytrain[:,2]).predict(Xtest)
deltalinreg=(ylinreg-Ytest[:,2])/60
RMSElinreg= np.sqrt(np.nansum(np.square(deltalinreg))/len(Xtest))

RT_RMSES=np.array([RMSEscopt,RMSEboost,RMSEvec,RMSEflat,RMSElinreg])


meanYtest=np.mean(Ytest[:,2])
msemeasured=np.nansum(np.square(Ytest[:,2]/60-meanYtest/60))

msescopt=np.nansum(np.square(scoptpred))
msegb=np.nansum(np.square(gbpred))
mseflat=np.nansum(np.square(deltaflat))
msevec=np.nansum(np.square(deltavec)) 
mselinreg=np.nansum(np.square(deltalinreg))     
                   
R2scopt=1-msescopt/msemeasured
R2gb=1-msegb/msemeasured
R2flat=1-mseflat/msemeasured 
R2vec=1-msevec/msemeasured
R2linreg=1-mselinreg/msemeasured
                   
                   
with open('plot_R2_realtime.pickle', 'wb') as f:
    pickle.dump([R2scopt,R2gb,R2flat,R2vec,R2linreg], f)



with open('plot_RMSE_realtime.pickle', 'wb') as f:
    pickle.dump(RT_RMSES, f)