import streamlit as st
from tools import *
import pandas as pd
from pycaret.regression import RegressionExperiment
from pycaret.classification import ClassificationExperiment
import time
from streamlit_extras.stateful_button import button
from sklearn.metrics import log_loss
_, file_tree_ph = sidebar(globals())
st.title("Evaluate models")
exp_id = st.session_state.get('exp_id')
if exp_id is None:
st.write("Please select an experiment first.")
st.stop()
use_synthetic_data = st.session_state.get('use_synth_data')
df = pd.read_csv(
PATH_TO_GEN_DATASET if use_synthetic_data else PATH_TO_TRAIN_DATASET)
with st.container(border=True):
chosen_target = st.selectbox(
'Choose the Target Column', df.columns, key="preserve_chosen_target", index=len(df.columns)-1)
chosen_ignore = st.multiselect('Choose the Ignore Columns',
df.columns, key="preserve_chosen_ignore")
st.selectbox('Choose the Problem Type', [
'classification', 'regression'], key="preserve_chosen_problem", index=identify_problem_type(df) == 'regression')
# create exp
if st.session_state.get('preserve_chosen_problem') == 'classification':
exp = ClassificationExperiment()
elif st.session_state.get('preserve_chosen_problem') == 'regression':
exp = RegressionExperiment()
eval_df = pd.read_csv(PATH_TO_EVAL_DATASET)
exp.setup(eval_df, target=chosen_target, verbose=False,
ignore_features=chosen_ignore, experiment_name=exp_id)
if st.session_state.get('preserve_chosen_problem') == 'classification':
exp.add_metric('logloss', 'Log Loss', log_loss,
greater_is_better=False, target="pred_proba")
# show list of files in the models directory
models = os.listdir(PATH_TO_MODELS)
models = [i.replace(".pkl", "") for i in models if i.endswith('.pkl')]
# sort models by last word in the name
models = sorted(models, key=lambda x: x.split('_')[-1])
chosen_models = st.multiselect(
'Choose the Model', options=["ALL", *models], key="preserve_chosen_final_models")
if "ALL" in chosen_models:
chosen_models = models
st.session_state['preserve_results'] = pd.DataFrame()
if len(chosen_models) == 0:
st.stop()
# load models
for model in chosen_models:
final_model = exp.load_model(f'{PATH_TO_MODELS}/{model}')
exp.predict_model(final_model, data=eval_df, verbose=False)
# st.dataframe(new_res, use_container_width=True)
new_res = exp.pull()
# TODO: add column whether synthetic or not, whether federated or not
if "fl_" in model:
# add FL prefix to model name
new_res['Model'] = f"Federated " + new_res['Model']
if "_gen" in model:
# add Synthetic prefix to model name
new_res['Model'] = f"Synthetic " + new_res['Model']
st.session_state['preserve_results'] = pd.concat(
[st.session_state['preserve_results'], new_res], axis=0)
st.subheader("Results")
st.dataframe(st.session_state['preserve_results'],
use_container_width=True, hide_index=True)