import streamlit as st from tools import * import pandas as pd from pycaret.regression import RegressionExperiment from pycaret.classification import ClassificationExperiment import time from streamlit_extras.stateful_button import button from sklearn.metrics import log_loss _, file_tree_ph = sidebar(globals()) st.title("Evaluate models") exp_id = st.session_state.get('exp_id') if exp_id is None: st.write("Please select an experiment first.") st.stop() use_synthetic_data = st.session_state.get('use_synth_data') df = pd.read_csv( PATH_TO_GEN_DATASET if use_synthetic_data else PATH_TO_TRAIN_DATASET) with st.container(border=True): chosen_target = st.selectbox( 'Choose the Target Column', df.columns, key="preserve_chosen_target", index=len(df.columns)-1) chosen_ignore = st.multiselect('Choose the Ignore Columns', df.columns, key="preserve_chosen_ignore") st.selectbox('Choose the Problem Type', [ 'classification', 'regression'], key="preserve_chosen_problem", index=identify_problem_type(df) == 'regression') # create exp if st.session_state.get('preserve_chosen_problem') == 'classification': exp = ClassificationExperiment() elif st.session_state.get('preserve_chosen_problem') == 'regression': exp = RegressionExperiment() eval_df = pd.read_csv(PATH_TO_EVAL_DATASET) exp.setup(eval_df, target=chosen_target, verbose=False, ignore_features=chosen_ignore, experiment_name=exp_id) if st.session_state.get('preserve_chosen_problem') == 'classification': exp.add_metric('logloss', 'Log Loss', log_loss, greater_is_better=False, target="pred_proba") # show list of files in the models directory models = os.listdir(PATH_TO_MODELS) models = [i.replace(".pkl", "") for i in models if i.endswith('.pkl')] # sort models by last word in the name models = sorted(models, key=lambda x: x.split('_')[-1]) chosen_models = st.multiselect( 'Choose the Model', options=["ALL", *models], key="preserve_chosen_final_models") if "ALL" in chosen_models: chosen_models = models st.session_state['preserve_results'] = pd.DataFrame() if len(chosen_models) == 0: st.stop() # load models for model in chosen_models: final_model = exp.load_model(f'{PATH_TO_MODELS}/{model}') exp.predict_model(final_model, data=eval_df, verbose=False) # st.dataframe(new_res, use_container_width=True) new_res = exp.pull() # TODO: add column whether synthetic or not, whether federated or not if "fl_" in model: # add FL prefix to model name new_res['Model'] = f"Federated " + new_res['Model'] if "_gen" in model: # add Synthetic prefix to model name new_res['Model'] = f"Synthetic " + new_res['Model'] st.session_state['preserve_results'] = pd.concat( [st.session_state['preserve_results'], new_res], axis=0) st.subheader("Results") st.dataframe(st.session_state['preserve_results'], use_container_width=True, hide_index=True)