pages/7_📊✅_Evaluate.py · auto-fl-fit

import streamlit as st
from tools import *
import pandas as pd
from pycaret.regression import RegressionExperiment
from pycaret.classification import ClassificationExperiment
import time
from streamlit_extras.stateful_button import button
from sklearn.metrics import log_loss

_, file_tree_ph = sidebar(globals())

st.title("Evaluate models")

exp_id = st.session_state.get('exp_id')
if exp_id is None:
    st.write("Please select an experiment first.")
    st.stop()

use_synthetic_data = st.session_state.get('use_synth_data')
df = pd.read_csv(
    PATH_TO_GEN_DATASET if use_synthetic_data else PATH_TO_TRAIN_DATASET)

with st.container(border=True):
    chosen_target = st.selectbox(
        'Choose the Target Column', df.columns, key="preserve_chosen_target", index=len(df.columns)-1)
    chosen_ignore = st.multiselect('Choose the Ignore Columns',
                                   df.columns, key="preserve_chosen_ignore")
    st.selectbox('Choose the Problem Type', [
        'classification', 'regression'], key="preserve_chosen_problem", index=identify_problem_type(df) == 'regression')
    # create exp
    if st.session_state.get('preserve_chosen_problem') == 'classification':
        exp = ClassificationExperiment()
    elif st.session_state.get('preserve_chosen_problem') == 'regression':
        exp = RegressionExperiment()

    eval_df = pd.read_csv(PATH_TO_EVAL_DATASET)
    exp.setup(eval_df, target=chosen_target, verbose=False,
              ignore_features=chosen_ignore, experiment_name=exp_id)

    if st.session_state.get('preserve_chosen_problem') == 'classification':
        exp.add_metric('logloss', 'Log Loss', log_loss,
                       greater_is_better=False, target="pred_proba")

    # show list of files in the models directory
    models = os.listdir(PATH_TO_MODELS)
    models = [i.replace(".pkl", "") for i in models if i.endswith('.pkl')]
    # sort models by last word in the name
    models = sorted(models, key=lambda x: x.split('_')[-1])
    chosen_models = st.multiselect(
        'Choose the Model', options=["ALL", *models], key="preserve_chosen_final_models")
    if "ALL" in chosen_models:
        chosen_models = models

st.session_state['preserve_results'] = pd.DataFrame()

if len(chosen_models) == 0:
    st.stop()

# load models
for model in chosen_models:
    final_model = exp.load_model(f'{PATH_TO_MODELS}/{model}')
    exp.predict_model(final_model, data=eval_df, verbose=False)
    # st.dataframe(new_res, use_container_width=True)
    new_res = exp.pull()
    # TODO: add column whether synthetic or not, whether federated or not
    if "fl_" in model:
        # add FL prefix to model name
        new_res['Model'] = f"Federated " + new_res['Model']
    if "_gen" in model:
        # add Synthetic prefix to model name
        new_res['Model'] = f"Synthetic " + new_res['Model']
    st.session_state['preserve_results'] = pd.concat(
        [st.session_state['preserve_results'], new_res], axis=0)

st.subheader("Results")
st.dataframe(st.session_state['preserve_results'],
             use_container_width=True, hide_index=True)