import sys
import hashlib
import streamlit as st
from streamlit.delta_generator import DeltaGenerator
import json
import os
import numpy as np
from directory_tree import display_tree
from pycaret.regression import RegressionExperiment
from pycaret.classification import ClassificationExperiment
PATH_TO_FL = "./pycaret-fl"
CONST_PATH_TO_DATA = "./data"
CONST_PATH_TO_MODELS = "./models"
def init_paths(scope, exp_id=None):
if exp_id:
PATH_TO_DATA = os.path.join(CONST_PATH_TO_DATA, exp_id)
PATH_TO_MODELS = os.path.join(CONST_PATH_TO_MODELS, exp_id)
else:
PATH_TO_DATA = CONST_PATH_TO_DATA
PATH_TO_MODELS = CONST_PATH_TO_MODELS
if not os.path.exists(PATH_TO_DATA):
os.makedirs(PATH_TO_DATA)
if not os.path.exists(PATH_TO_MODELS):
os.makedirs(PATH_TO_MODELS)
scope['PATH_TO_DATA'] = PATH_TO_DATA
scope['PATH_TO_MODELS'] = PATH_TO_MODELS
scope['PATH_TO_DATASET'] = f"{PATH_TO_DATA}/dataset.csv"
scope['PATH_TO_TRAIN_DATASET'] = f"{PATH_TO_DATA}/train_dataset.csv"
scope['PATH_TO_EVAL_DATASET'] = f"{PATH_TO_DATA}/eval_dataset.csv"
scope['PATH_TO_GEN_DATASET'] = f"{PATH_TO_DATA}/gen_dataset.csv"
def init_session():
if 'use_synth_data' not in st.session_state:
st.session_state['use_synth_data'] = False
if 'exp_id' not in st.session_state:
# load from config.json
if os.path.exists('config.json'):
with open('config.json', 'r') as f:
config = json.load(f)
st.session_state['exp_id'] = config.get('exp_id')
else:
st.session_state['exp_id'] = None
def update_exp_id(exp_id_ph: DeltaGenerator, exp_id: str):
if exp_id:
exp_id_ph.caption(f"Experiment ID: *{exp_id}*")
else:
exp_id_ph.info("Experiment not initialized, please upload a dataset")
def update_file_tree(ph, scope):
if (ptd := scope.get('PATH_TO_DATA')) and (ptm := scope.get('PATH_TO_MODELS')):
import seedir as sd
fd = sd.FakeDir("Structure")
fdd = fd.create_folder('data')
fdm = fd.create_folder('models')
fptd = sd.fakedir(ptd)
fdd._children = fptd._children
fptm = sd.fakedir(ptm)
fdm._children = fptm._children
filetree = fd.seedir(style='emoji', printout=False)
container = ph.container(height=310, border=True)
container.text(filetree)
def sidebar(scope):
st.set_page_config(layout="wide")
init_session()
# Custom CSS for changing the sidebar navigation
custom_css = """
<style>
[data-testid=stSidebarContent] {
overflow: overlay !important;
}
[data-testid=stSidebarNavItems] {
max-height: 75vh !important;
}
[data-testid=stSidebarUserContent] {
padding-bottom: 1rem !important;
}
</style>
"""
# Apply custom CSS
st.markdown(custom_css, unsafe_allow_html=True)
with st.sidebar:
st.checkbox("Use synthetic data", key="use_synth_data",
value=st.session_state['use_synth_data'], on_change=reset_run_btn)
exp_id = st.session_state['exp_id']
init_paths(scope, exp_id)
file_tree_ph = st.empty()
update_file_tree(file_tree_ph, scope)
exp_id_ph = st.empty()
update_exp_id(exp_id_ph, exp_id)
return exp_id_ph, file_tree_ph
def show_df(df):
st.dataframe(df, use_container_width=True)
def identify_problem_type(df, threshold=10):
# Check if the unique values in the column are less than a certain threshold
chosen_target = st.session_state.get('preserve_chosen_target')
unique_values = np.unique(df[chosen_target])
if len(unique_values) < threshold:
return "classification"
return "regression"
def get_plot_types(problem_type):
regression_plots = ["residuals", "error", "cooks",
"rfe", "learning", "vc", "manifold", "feature", "feature_all", "parameter"]
regression_plots = {
# 'Schematic drawing of the preprocessing pipeline': 'pipeline',
'Interactive Residual plots': 'residuals_interactive',
'Residuals Plot': 'residuals',
'Prediction Error Plot': 'error',
'Cooks Distance Plot': 'cooks',
'Recursive Feat. Selection': 'rfe',
'Learning Curve': 'learning',
'Validation Curve': 'vc',
'Manifold Learning': 'manifold',
'Feature Importance': 'feature',
'Feature Importance (All)': 'feature_all',
'Model Hyperparameter': 'parameter',
'Decision Tree': 'tree',
}
classification_plots = {
# 'Schematic drawing of the preprocessing pipeline': 'pipeline',
'Area Under the Curve': 'auc',
'Discrimination Threshold': 'threshold',
'Precision Recall Curve': 'pr',
'Confusion Matrix': 'confusion_matrix',
'Class Prediction Error': 'error',
'Classification Report': 'class_report',
'Decision Boundary': 'boundary',
'Recursive Feature Selection': 'rfe',
'Learning Curve': 'learning',
'Manifold Learning': 'manifold',
'Calibration Curve': 'calibration',
'Validation Curve': 'vc',
'Dimension Learning': 'dimension',
'Feature Importance': 'feature',
'Feature Importance (All)': 'feature_all',
'Model Hyperparameter': 'parameter',
'Lift Curve': 'lift',
'Gain Chart': 'gain',
'Decision Tree': 'tree',
'KS Statistic Plot': 'ks',
}
if problem_type == 'classification':
return classification_plots
elif problem_type == 'regression':
return regression_plots
def reset_run_btn():
st.session_state['run_mod_btn'] = False
@st.cache_resource()
def create_experiment(chosen_problem, df, target, ignore_features, exp_id):
if chosen_problem == 'classification':
exp = ClassificationExperiment()
elif chosen_problem == 'regression':
exp = RegressionExperiment()
exp.setup(df, target=target, verbose=False,
ignore_features=ignore_features, experiment_name=exp_id)
return exp