import numpy as np import tensorflow as tf import random import os os.environ['PYTHONHASHSEED'] = '0' np.random.seed(0) random.seed(0) session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) from keras import backend as K tf.set_random_seed(0) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) from numpy import genfromtxt import matplotlib.pyplot as plt import pandas as pd from keras.regularizers import l2 from keras.utils import to_categorical from keras.optimizers import SGD, Nadam, Adam # from keras.layers import Input, Dense, Activation, Dropout, BatchNormalization, Flatten, Merge, merge from keras.layers import Input, Dense, Activation, Dropout, BatchNormalization, Flatten, merge from keras.callbacks import ModelCheckpoint, EarlyStopping from keras.models import Model, model_from_json from sklearn.metrics import roc_auc_score from sklearn.metrics import precision_recall_fscore_support from sklearn.model_selection import StratifiedShuffleSplit from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score from sklearn.metrics import roc_auc_score from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV from keras.wrappers.scikit_learn import KerasClassifier from sklearn.model_selection import StratifiedKFold from keras.constraints import maxnorm from sklearn.model_selection import ParameterGrid from sklearn.model_selection import train_test_split from keras.layers import concatenate from sklearn.metrics import roc_auc_score,accuracy_score,roc_curve,auc # unnormalized [clinical,array] data = np.load('metabric.npz',allow_pickle=True) x_train = data['X_rest'] y_train = data['y_rest'] x_test = data['X_test'] y_test = data['y_test'] num_clinical = 10 # normalization MIN = x_train.min(axis=0) MAX = x_train.max(axis=0) for col in [0,2,6]: x_train[:,col] = (x_train[:,col] - MIN[col]) / (MAX[col] - MIN[col]) x_test[:,col] = (x_test[:,col] - MIN[col]) / (MAX[col] - MIN[col]) for col in range(30): if col < num_clinical: continue x_train[:,col] = (x_train[:,col] - MIN[col]) / (MAX[col] - MIN[col]) x_test[:,col] = (x_test[:,col] - MIN[col]) / (MAX[col] - MIN[col]) def create_model(neuron, activation, l2term, lr, lr_decay, rate, norm): neuron_a = 20 activation_a = 'relu' l2term_a = 0.0001 rate_a = 0.05 norm_a = 1 input_array = Input(shape=(len(x_train[1,num_clinical:]),)) hidden1_array_ = Dense(neuron_a,kernel_initializer='glorot_normal', kernel_regularizer=l2(l2term_a), activation=activation_a, kernel_constraint=maxnorm(norm_a),trainable=False)(input_array) hidden1_array = Dropout(rate_a)(hidden1_array_) hidden2_array_ = Dense(neuron_a,kernel_initializer='glorot_normal', kernel_regularizer=l2(l2term_a), activation=activation_a, kernel_constraint=maxnorm(norm_a),trainable=False)(hidden1_array) hidden2_array = Dropout(rate_a)(hidden2_array_) hidden3_array_ = Dense(neuron_a,kernel_initializer='glorot_normal', kernel_regularizer=l2(l2term_a), activation=activation_a, kernel_constraint=maxnorm(norm_a),trainable=False)(hidden2_array) hidden3_array = Dropout(rate_a)(hidden3_array_) Output_array = Dense(1, activation='sigmoid',trainable=False)(hidden3_array) model_array = Model(inputs=input_array, outputs=Output_array) model_array.load_weights('bimodal_array.hdf5') # return model_array # uncomment the above line to pre-train microarray subnetwork neuron_c = 5 activation_c = 'tanh' l2term_c = 0.0001 rate_c = 0.05 norm_c = 1 input_clinical = Input(shape=(len(x_train[1,:num_clinical]),)) hidden1_clinical_ = Dense(neuron_c,kernel_initializer='glorot_normal', kernel_regularizer=l2(l2term_c), activation=activation_c, kernel_constraint=maxnorm(norm_c),trainable=False)(input_clinical) hidden1_clinical = Dropout(rate_c)(hidden1_clinical_) hidden2_clinical_ = Dense(neuron_c,kernel_initializer='glorot_normal', kernel_regularizer=l2(l2term_c), activation=activation_c, kernel_constraint=maxnorm(norm_c),trainable=False)(hidden1_clinical) hidden2_clinical = Dropout(rate_c)(hidden2_clinical_) Output_clinical = Dense(1, activation='sigmoid')(hidden2_clinical) model_clinical = Model(inputs=input_clinical, outputs=Output_clinical) model_clinical.load_weights('bimodal_clinical.hdf5') # return model_clinical # uncomment the above line to pre-train clinical subnetwork merge_vector = concatenate([hidden3_array,hidden2_clinical],axis=-1) # merge_vector=merge([hidden3_array,hidden2_clinical],mode='concat',concat_axis=1) hidden1_merge_ = Dense( neuron, kernel_initializer='glorot_normal', kernel_regularizer=l2(l2term), activation=activation, kernel_constraint=maxnorm(norm))(merge_vector) hidden1_merge = Dropout(rate)(hidden1_merge_) Output_merge = Dense(1, activation='sigmoid')(hidden1_merge) model_merge = Model(inputs=[input_array, input_clinical],outputs=Output_merge) return model_merge # merged network def create_model_2(neuron, activation, l2term, lr, lr_decay, rate, norm): neuron_a = 20 activation_a = 'relu' l2term_a = 0.0001 rate_a = 0.05 norm_a = 1 input_array = Input(shape=(len(x_train[1,num_clinical:]),)) hidden1_array_ = Dense(neuron_a,kernel_initializer='glorot_normal', kernel_regularizer=l2(l2term_a), activation=activation_a, kernel_constraint=maxnorm(norm_a))(input_array) hidden1_array = Dropout(rate_a)(hidden1_array_) hidden2_array_ = Dense(neuron_a,kernel_initializer='glorot_normal', kernel_regularizer=l2(l2term_a), activation=activation_a, kernel_constraint=maxnorm(norm_a))(hidden1_array) hidden2_array = Dropout(rate_a)(hidden2_array_) hidden3_array_ = Dense(neuron_a,kernel_initializer='glorot_normal', kernel_regularizer=l2(l2term_a), activation=activation_a, kernel_constraint=maxnorm(norm_a))(hidden2_array) hidden3_array = Dropout(rate_a)(hidden3_array_) Output_array = Dense(1, activation='sigmoid')(hidden3_array) model_array = Model(inputs=input_array, outputs=Output_array) model_array.load_weights('../../model/bimodal/bimodal_array.hdf5') # return model_array neuron_c = 5 activation_c = 'tanh' l2term_c = 0.0001 rate_c = 0.05 norm_c = 1 input_clinical = Input(shape=(len(x_train[1,:num_clinical]),)) hidden1_clinical_ = Dense(neuron_c,kernel_initializer='glorot_normal', kernel_regularizer=l2(l2term_c), activation=activation_c, kernel_constraint=maxnorm(norm_c))(input_clinical) hidden1_clinical = Dropout(rate_c)(hidden1_clinical_) hidden2_clinical_ = Dense(neuron_c,kernel_initializer='glorot_normal', kernel_regularizer=l2(l2term_c), activation=activation_c, kernel_constraint=maxnorm(norm_c))(hidden1_clinical) hidden2_clinical = Dropout(rate_c)(hidden2_clinical_) Output_clinical = Dense(1, activation='sigmoid')(hidden2_clinical) model_clinical = Model(inputs=input_clinical, outputs=Output_clinical) model_clinical.load_weights('../../model/bimodal/bimodal_clinical.hdf5') # return model_clinical merge_vector = concatenate([hidden3_array,hidden2_clinical],axis=-1) # merge_vector=merge([hidden3_array,hidden2_clinical],mode='concat',concat_axis=1) hidden1_merge_ = Dense( neuron, kernel_initializer='glorot_normal', kernel_regularizer=l2(l2term), activation=activation, kernel_constraint=maxnorm(norm))(merge_vector) hidden1_merge = Dropout(rate)(hidden1_merge_) Output_merge = Dense(1, activation='sigmoid')(hidden1_merge) model_merge = Model(inputs=[input_array, input_clinical],outputs=Output_merge) model_merge.load_weights('../../model/bimodal/bimodal_merge.hdf5') return model_merge model = create_model(neuron=10, activation='tanh', l2term=0.0001, lr=0.00002, lr_decay=0.004, rate=0.05, norm=1) # array # model.compile(loss='binary_crossentropy', # optimizer = Nadam(lr=0.0002,schedule_decay=0.004)) # model.fit(x_train[:,num_clinical:],y_train,batch_size=10,epochs=200,verbose=1) # model.save('../../model/bimodal/bimodal_array.hdf5') # model.load_weights('../../model/bimodal/bimodal_array.hdf5') # clinical # model.compile(loss='binary_crossentropy', # optimizer = Nadam(lr=0.002,schedule_decay=0.004)) # model.fit(x_train[:,:num_clinical],y_train,batch_size=10,epochs=800,verbose=1) # model.save('../../model/bimodal/bimodal_clinical.hdf5') # model.load_weights('../../model/bimodal/bimodal_clinical.hdf5') # merge # model.compile(loss='binary_crossentropy', # optimizer = Nadam(lr=0.00002,schedule_decay=0.004)) # model.fit([x_train[:,num_clinical:],x_train[:,:num_clinical]],y_train,batch_size=10,epochs=100,verbose=1) # model.save('../../model/bimodal/bimodal_merge.hdf5') # model.load_weights('../../model/bimodal/bimodal_merge.hdf5') model = create_model_2(neuron=10, activation='tanh', l2term=0.0001, lr=0.00002, lr_decay=0.004, rate=0.05, norm=1) # model.compile(loss='binary_crossentropy', # optimizer = Nadam(lr=0.00002,schedule_decay=0.004)) # model.fit([x_train[:,num_clinical:],x_train[:,:num_clinical]],y_train,batch_size=10,epochs=100,verbose=1) # model.save('../../model/bimodal/bimodal_merge_2.hdf5') model.load_weights('../../model/bimodal/bimodal_merge_2.hdf5') # independent validation # data_indep = np.load('../../data/breast/indep_valid_geos_21653.npz',allow_pickle=True) # x_test = data_indep['exprs'] # for col in [0,2,6]: x_test[:,col] = (x_test[:,col] - MIN[col]) / (MAX[col] - MIN[col]) # y_test = data_indep['labs'] # logits = model.predict(x_test) # fpr,tpr,thr = roc_curve(y_test.astype(int),logits,pos_label=1) # print(auc(fpr,tpr)) # np.savez_compressed('../../model/bimodal/breast_bimodal_indep_logits.npz',pred=logits) # logits = model.predict(x_test[:,num_clinical:]) # logits = model.predict(x_test[:,:num_clinical]) logits = model.predict([x_test[:,num_clinical:],x_test[:,:num_clinical]]) fpr,tpr,thr = roc_curve(y_test.astype(int),logits,pos_label=1) print(auc(fpr,tpr)) valid_num = int(np.shape(x_train)[0] * 0.25) x_valid = x_train[-valid_num:,:] y_valid = y_train[-valid_num:] y_pred_valid = model.predict([x_valid[:,num_clinical:],x_valid[:,:num_clinical]]) print(np.shape(x_valid)) print(np.shape(y_valid)) fpr,tpr,thr = roc_curve(y_valid,y_pred_valid) thr_best = thr[np.argmax(np.subtract(tpr,fpr))] # np.savez_compressed('../../model/bimodal/breast_bimodal_logits.npz',pred=logits,thr=thr_best)