# -*- coding: utf-8 -*- """ Created on Mon Jul 27 17:28:57 2020 @author: baum_c4 performs hyperparameter search based on scikit-optimize/skopt """ from skopt import BayesSearchCV from sklearn.datasets import load_digits from sklearn.svm import SVC from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from func_sorted_stratification import sorted_stratification from sklearn.ensemble import GradientBoostingRegressor from skopt.space import Real, Integer from skopt.utils import use_named_args import pickle import numpy as np with open('delay_DST_learningset.pickle', 'rb') as f: [learnvector,timevector]= pickle.load(f) random=42 X=learnvector Y=timevector X,Y=sorted_stratification(learnvector,timevector,10,0) #importances=[] j=5 #jj=9 XTrain=np.concatenate([X[i] for i in range(len(X)) if i!=j]) YTrain=np.concatenate([Y[i] for i in range(len(X)) if i!=j]) XTest=X[j] YTest=Y[j] #XTestt=X[jj] n_features = XTrain.shape[1] # log-uniform: understand as search over p = exp(x) by varying x opt = BayesSearchCV( GradientBoostingRegressor(), { 'learning_rate': (1e-2, 1e0, 'log-uniform'), 'max_depth': (1, 10), 'max_features': (1,n_features), 'min_samples_split':(2,20), 'min_samples_leaf':(1,20), 'n_estimators':(5,500), 'min_impurity_decrease': (0,0.3), #'subsample':(0.1,1) # Real(10**-2, 10**0, "log-uniform", name='learning_rate'), # Integer(1, 5, name='max_features'), # Integer(2, 20, name='min_samples_split'), # Integer(1, 20, name='min_samples_leaf'), # Integer(5,500,name='n_estimators'), # Real(0,1,name='min_impurity_decrease'), # categorical parameter }, n_iter=100, cv=5 ) opt.fit(XTrain, YTrain[:,2]) print("val. score: %s" % opt.best_score_) print("test score: %s" % opt.score(XTest, YTest[:,2])) optRF = BayesSearchCV( RandomForestRegressor(n_estimators=500), { #'learning_rate': (1e-2, 1e0, 'log-uniform'), 'max_depth': (1, 20), 'max_features': (1,n_features), 'min_samples_split':(2,20), 'min_samples_leaf':(1,20), #'n_estimators':(20,1000), 'min_impurity_decrease': (0.,0.3), #'max_samples':(1,273) # spacetree = [Integer(1, 160, name='max_depth'), # Integer(20, 500, name='n_estimators'), # Integer(1,n_features, name='max_features'), # Integer(2, 5, name='min_samples_split'), # Integer(1, 4, name='min_samples_leaf'), # Real(0,1,name='min_impurity_decrease')] }, n_iter=100, cv=5 ) optRF.fit(XTrain, YTrain[:,2]) GPparams=opt.best_params_ RFparams=optRF.best_params_ print("val. score: %s" % optRF.best_score_) print("test score: %s" % optRF.score(XTest, YTest[:,2])) with open('best_bayes.pickle', 'wb') as f: pickle.dump([GPparams,RFparams],f) with open('best_bayes_data.pickle', 'wb') as f: pickle.dump([XTrain,YTrain,XTest,YTest],f)