Phase-prediction-of-HEAs-private-share / Hyperparameter_tuning_of_RFC-model.ipynb
Hyperparameter_tuning_of_RFC-model.ipynb
Raw
{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "X5AMaKsOmkjy"
      },
      "source": [
        "## 1) Problem statement.\n",
        "Phase prediction ofHEAs. Hyperparameter tuning without SMOTETomek"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 90,
      "metadata": {
        "id": "GTwZRJK4jxLd",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "4b93fa27-1f8b-408b-96e2-26659b943edf"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: optuna in /usr/local/lib/python3.8/dist-packages (3.1.0)\n",
            "Requirement already satisfied: cmaes>=0.9.1 in /usr/local/lib/python3.8/dist-packages (from optuna) (0.9.1)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from optuna) (4.64.1)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from optuna) (1.21.6)\n",
            "Requirement already satisfied: colorlog in /usr/local/lib/python3.8/dist-packages (from optuna) (6.7.0)\n",
            "Requirement already satisfied: alembic>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from optuna) (1.9.4)\n",
            "Requirement already satisfied: sqlalchemy>=1.3.0 in /usr/local/lib/python3.8/dist-packages (from optuna) (1.4.46)\n",
            "Requirement already satisfied: PyYAML in /usr/local/lib/python3.8/dist-packages (from optuna) (6.0)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from optuna) (23.0)\n",
            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.8/dist-packages (from alembic>=1.5.0->optuna) (6.0.0)\n",
            "Requirement already satisfied: importlib-resources in /usr/local/lib/python3.8/dist-packages (from alembic>=1.5.0->optuna) (5.10.2)\n",
            "Requirement already satisfied: Mako in /usr/local/lib/python3.8/dist-packages (from alembic>=1.5.0->optuna) (1.2.4)\n",
            "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.8/dist-packages (from sqlalchemy>=1.3.0->optuna) (2.0.2)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata->alembic>=1.5.0->optuna) (3.13.0)\n",
            "Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.8/dist-packages (from Mako->alembic>=1.5.0->optuna) (2.0.1)\n"
          ]
        }
      ],
      "source": [
        "pip install optuna"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "OYaew9gYmkj1"
      },
      "source": [
        "## 2) Import required libraries"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 91,
      "metadata": {
        "id": "SG7iLs0Zmkj1"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "import seaborn as sns\n",
        "import numpy as np\n",
        "from statistics import mean\n",
        "import matplotlib.pyplot as plt\n",
        "import warnings\n",
        "from sklearn.preprocessing import PowerTransformer\n",
        "import numpy as np\n",
        "from sklearn.preprocessing import LabelEncoder\n",
        "from sklearn.pipeline import Pipeline\n",
        "from sklearn.utils import resample\n",
        "\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier\n",
        "from sklearn.neighbors import KNeighborsClassifier\n",
        "from sklearn.tree import DecisionTreeClassifier\n",
        "from sklearn.svm import SVC\n",
        "from sklearn.metrics import accuracy_score, classification_report,ConfusionMatrixDisplay, \\\n",
        "                            precision_score, recall_score, f1_score, roc_auc_score,roc_curve,confusion_matrix\n",
        "\n",
        "\n",
        "from sklearn import metrics \n",
        "from sklearn.model_selection import  train_test_split, RepeatedStratifiedKFold, cross_val_score\n",
        "from sklearn.preprocessing import OneHotEncoder, MinMaxScaler\n",
        "from sklearn.compose import ColumnTransformer\n",
        "from sklearn.impute import SimpleImputer, KNNImputer\n",
        "from xgboost import XGBClassifier\n",
        "from sklearn.preprocessing import StandardScaler, MinMaxScaler,RobustScaler\n",
        "from sklearn.compose import ColumnTransformer\n",
        "\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Do4oeoyNmkj3"
      },
      "source": [
        "### Read Data"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 92,
      "metadata": {
        "id": "W_8SVbfwmkj3"
      },
      "outputs": [],
      "source": [
        "# Load csv file\n",
        "df3 = pd.read_excel('Phase_data.xlsx', na_values=\"na\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 93,
      "metadata": {
        "id": "miijRwgmmkj4",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "43aaa978-e5c8-45a2-b5d5-d9f20347b634"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(1200, 36)"
            ]
          },
          "metadata": {},
          "execution_count": 93
        }
      ],
      "source": [
        "# check rows and columns of the dataset\n",
        "df3.shape"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 94,
      "metadata": {
        "id": "ANWg7y2hmkj4",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 235
        },
        "outputId": "56e30444-dee7-403a-87fd-78c0d9f5e619"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "      Al  Co  Cr  Fe  Ni  Cu  Mn     Ti      V     Nb  ...  Pt   Y  Pd  Au  \\\n",
              "0  0.111 NaN NaN NaN NaN NaN NaN  0.222  0.222  0.222  ... NaN NaN NaN NaN   \n",
              "1  0.158 NaN NaN NaN NaN NaN NaN  0.215  0.215  0.215  ... NaN NaN NaN NaN   \n",
              "2  0.588 NaN NaN NaN NaN NaN NaN  0.235  0.235  0.235  ... NaN NaN NaN NaN   \n",
              "3  0.588 NaN NaN NaN NaN NaN NaN  0.235  0.235  0.235  ... NaN NaN NaN NaN   \n",
              "4  0.476 NaN NaN NaN NaN NaN NaN  0.239  0.239    NaN  ... NaN NaN NaN NaN   \n",
              "\n",
              "   dHmix   dSmix      δ     ᐃχ    VEC  Phases  \n",
              "0 -8.395  13.146  3.738  0.050  4.556     BCC  \n",
              "1 -9.352  13.333  3.863  0.233  4.684     BCC  \n",
              "2 -4.042  12.708  4.003  0.243  4.882     BCC  \n",
              "3 -4.817  12.708  3.832  0.050  4.647     BCC  \n",
              "4 -3.356  12.569  4.018  0.244  4.905     BCC  \n",
              "\n",
              "[5 rows x 36 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-4cb0d41e-aa48-44a3-8776-f8eb518d78d7\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Al</th>\n",
              "      <th>Co</th>\n",
              "      <th>Cr</th>\n",
              "      <th>Fe</th>\n",
              "      <th>Ni</th>\n",
              "      <th>Cu</th>\n",
              "      <th>Mn</th>\n",
              "      <th>Ti</th>\n",
              "      <th>V</th>\n",
              "      <th>Nb</th>\n",
              "      <th>...</th>\n",
              "      <th>Pt</th>\n",
              "      <th>Y</th>\n",
              "      <th>Pd</th>\n",
              "      <th>Au</th>\n",
              "      <th>dHmix</th>\n",
              "      <th>dSmix</th>\n",
              "      <th>δ</th>\n",
              "      <th>ᐃχ</th>\n",
              "      <th>VEC</th>\n",
              "      <th>Phases</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0.111</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>0.222</td>\n",
              "      <td>0.222</td>\n",
              "      <td>0.222</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>-8.395</td>\n",
              "      <td>13.146</td>\n",
              "      <td>3.738</td>\n",
              "      <td>0.050</td>\n",
              "      <td>4.556</td>\n",
              "      <td>BCC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>0.158</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>0.215</td>\n",
              "      <td>0.215</td>\n",
              "      <td>0.215</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>-9.352</td>\n",
              "      <td>13.333</td>\n",
              "      <td>3.863</td>\n",
              "      <td>0.233</td>\n",
              "      <td>4.684</td>\n",
              "      <td>BCC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>0.588</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>0.235</td>\n",
              "      <td>0.235</td>\n",
              "      <td>0.235</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>-4.042</td>\n",
              "      <td>12.708</td>\n",
              "      <td>4.003</td>\n",
              "      <td>0.243</td>\n",
              "      <td>4.882</td>\n",
              "      <td>BCC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>0.588</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>0.235</td>\n",
              "      <td>0.235</td>\n",
              "      <td>0.235</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>-4.817</td>\n",
              "      <td>12.708</td>\n",
              "      <td>3.832</td>\n",
              "      <td>0.050</td>\n",
              "      <td>4.647</td>\n",
              "      <td>BCC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>0.476</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>0.239</td>\n",
              "      <td>0.239</td>\n",
              "      <td>NaN</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>-3.356</td>\n",
              "      <td>12.569</td>\n",
              "      <td>4.018</td>\n",
              "      <td>0.244</td>\n",
              "      <td>4.905</td>\n",
              "      <td>BCC</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>5 rows × 36 columns</p>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-4cb0d41e-aa48-44a3-8776-f8eb518d78d7')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-4cb0d41e-aa48-44a3-8776-f8eb518d78d7 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-4cb0d41e-aa48-44a3-8776-f8eb518d78d7');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 94
        }
      ],
      "source": [
        "df3.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 95,
      "metadata": {
        "id": "EzuTamjymkj4",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "facc1465-c753-4c2c-8e9e-bf7dffc3097c"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "MIP        441\n",
              "BCC        372\n",
              "FCC        220\n",
              "FCC_BCC    167\n",
              "Name: Phases, dtype: int64"
            ]
          },
          "metadata": {},
          "execution_count": 95
        }
      ],
      "source": [
        "# Check unique values of target varaible\n",
        "df3['Phases'].value_counts()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 96,
      "metadata": {
        "id": "jo3vqA8wmkj5",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "5d51b864-c0ad-4e32-c98a-0f06aec1dc8b"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Al        float64\n",
              "Co        float64\n",
              "Cr        float64\n",
              "Fe        float64\n",
              "Ni        float64\n",
              "Cu        float64\n",
              "Mn        float64\n",
              "Ti        float64\n",
              "V         float64\n",
              "Nb        float64\n",
              "Mo        float64\n",
              "Zr        float64\n",
              "Hf        float64\n",
              "Ta        float64\n",
              "W         float64\n",
              "C         float64\n",
              "Mg        float64\n",
              "Zn        float64\n",
              "Si        float64\n",
              "Re        float64\n",
              "N         float64\n",
              "Li        float64\n",
              "Sn        float64\n",
              "Be        float64\n",
              "B         float64\n",
              "Ag        float64\n",
              "Pt        float64\n",
              "Y         float64\n",
              "Pd        float64\n",
              "Au        float64\n",
              "dHmix     float64\n",
              "dSmix     float64\n",
              "δ         float64\n",
              "ᐃχ        float64\n",
              "VEC       float64\n",
              "Phases     object\n",
              "dtype: object"
            ]
          },
          "metadata": {},
          "execution_count": 96
        }
      ],
      "source": [
        "df3.dtypes"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 97,
      "metadata": {
        "id": "6CuLGrulZQMF",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 386
        },
        "outputId": "94b822d8-2d86-417c-dd2a-7d9c425d6086"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "MIP: 441, BCC: 372, FCC: 220, FCC_BCC : 167\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<Figure size 360x360 with 1 Axes>"
            ],
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAFgCAYAAACFYaNMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAS40lEQVR4nO3df7BndX3f8eeLXQgQRVS2lLA0S5TEEgaJWVFj82NwrCCpUAsJNsQ1oW6SMZlomlCathYbnTGKoRozTokYlvxQ8UciY43EQY2mY4iLLht+mHHVWmEQFkTUqCTAu398Pxdv1mX3y7rnvu/e+3zM3NlzPufcu5/vgXnes+ee77mpKiRJS++g7glI0mplgCWpiQGWpCYGWJKaGGBJarK2ewLfidNPP73e//73d09DkvYmuxs8oM+A77rrru4pSNI+O6ADLEkHMgMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSkwP6cZTSUrvwj7Z3T2FJveb8k7unsKJ5BixJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1GTyACdZk+STSd471o9Pcl2SHUnenuSQMf5dY33H2L5h6rlJUqelOAP+VeCWReu/DVxaVU8E7gEuGOMXAPeM8UvHfpK0Yk0a4CTrgTOBN4/1AKcB7xy7bAHOHstnjXXG9meN/SVpRZr6DPh/AhcCD471xwNfrqr7x/qtwLFj+VjgCwBj+71j/38iyeYkW5Ns3blz55Rzl6RJTRbgJD8J3FlV1+/Pr1tVl1XVxqrauG7duv35pSVpSa2d8Gs/E3hekucChwJHAK8HjkyydpzlrgduG/vfBhwH3JpkLfAY4O4J5ydJrSY7A66q/1xV66tqA3Ae8MGq+hngQ8A5Y7dNwHvG8tVjnbH9g1VVU81Pkrp13Af8n4BfS7KD2TXey8f45cDjx/ivARc1zE2SlsyUlyAeUlUfBj48lj8LnLqbfb4JnLsU85Gk5cB3wklSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSkyV5GlqXC7d/oHsKS+Y1Jz+7ewqSHiHPgCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkppMFuAkhyb5myQ3JLkpySvG+PFJrkuyI8nbkxwyxr9rrO8Y2zdMNTdJWg6mPAO+Dzitqp4MnAKcnuTpwG8Dl1bVE4F7gAvG/hcA94zxS8d+krRiTRbgmvnaWD14fBRwGvDOMb4FOHssnzXWGduflSRTzU+Suk16DTjJmiTbgDuBDwCfAb5cVfePXW4Fjh3LxwJfABjb7wUev5uvuTnJ1iRbd+7cOeX0JWlSkwa4qh6oqlOA9cCpwJP2w9e8rKo2VtXGdevWfcdzlKQuS3IXRFV9GfgQ8AzgyCRrx6b1wG1j+TbgOICx/THA3UsxP0nqMOVdEOuSHDmWDwOeDdzCLMTnjN02Ae8Zy1ePdcb2D1ZVTTU/Seq2du+77LNjgC1J1jAL/VVV9d4kNwNvS/JK4JPA5WP/y4E/TLID+BJw3oRzk6R2kwW4qrYDP7Sb8c8yux686/g3gXOnmo8kLTe+E06SmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKazBXgJNfOMyZJmt/aPW1McihwOHBUkscCGZuOAI6deG6StKLtMcDALwAvBb4HuJ5vBfgrwBsnnJckrXh7DHBVvR54fZJfqarfXaI5SdKqsLczYACq6neT/AiwYfHnVNWVE81Lkla8uQKc5A+BJwDbgAfGcAEGWJL20VwBBjYCJ1ZVTTkZSVpN5r0P+Ebgn085EUlabeY9Az4KuDnJ3wD3LQxW1fMmmZUkrQLzBvjiKSchSavRvHdB/OXUE5Gk1WbeuyC+yuyuB4BDgIOBv6+qI6aamCStdPOeAT96YTlJgLOAp081KUlaDR7x09Bq5s+A50wwH0laNea9BPH8RasHMbsv+JuTzEiSVol574L4N4uW7wf+L7PLEJKkfTTvNeCfm3oikrTazPtA9vVJ/jTJnePjXUnWTz05SVrJ5r0E8QfAnwDnjvXzx9izp5iUltavX/NX3VNYUpc85191T0EC5r8LYl1V/UFV3T8+rgDWTTgvSVrx5g3w3UnOT7JmfJwP3D3lxCRppZs3wD8P/BTwReB24BzgRRPNSZJWhXmvAf8PYFNV3QOQ5HHAJczCLEnaB/OeAZ+8EF+AqvoS8EPTTEmSVod5A3zQ+LX0wENnwPOePUuSdmPeiL4O+FiSd4z1c4FXTTMlSVod5n0n3JVJtgKnjaHnV9XN001Lkla+uS8jjOAaXUnaTx7x4yjnleS4JB9KcnOSm5L86hh/XJIPJPn0+POxYzxJ3pBkR5LtSZ4y1dwkaTmYLMDMnpr2H6vqRGYPb39JkhOBi4Brq+oE4NqxDnAGcML42Ay8acK5SVK7yQJcVbdX1SfG8leBW4BjmT3GcsvYbQtw9lg+C7hyPPD9r4Ejkxwz1fwkqduUZ8APSbKB2X3D1wFHV9XtY9MXgaPH8rHAFxZ92q1jTJJWpMkDnORRwLuAl1bVVxZvq6riW7/sc96vtznJ1iRbd+7cuR9nKklLa9IAJzmYWXz/uKrePYbvWLi0MP68c4zfBhy36NPXj7F/oqouq6qNVbVx3TofyCbpwDXlXRABLgduqarfWbTpamDTWN4EvGfR+AvH3RBPB+5ddKlCklacKd9O/EzgZ4G/TbJtjP0m8GrgqiQXAJ9n9pQ1gPcBzwV2AF8H/DVIkla0yQJcVX8F5GE2P2s3+xfwkqnmI0nLzZLcBSFJ+nYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJanJlL8RQ9IqddGln+uewpJ69cuO36fP8wxYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmhhgSWpigCWpiQGWpCYGWJKaGGBJamKAJamJAZakJgZYkpoYYElqYoAlqYkBlqQmBliSmkwW4CRvSXJnkhsXjT0uyQeSfHr8+dgxniRvSLIjyfYkT5lqXpK0XEx5BnwFcPouYxcB11bVCcC1Yx3gDOCE8bEZeNOE85KkZWGyAFfVR4Av7TJ8FrBlLG8Bzl40fmXN/DVwZJJjppqbJC0HS30N+Oiqun0sfxE4eiwfC3xh0X63jrFvk2Rzkq1Jtu7cuXO6mUrSxNp+CFdVBdQ+fN5lVbWxqjauW7dugplJ0tJY6gDfsXBpYfx55xi/DThu0X7rx5gkrVhLHeCrgU1jeRPwnkXjLxx3QzwduHfRpQpJWpHWTvWFk7wV+AngqCS3Av8deDVwVZILgM8DPzV2fx/wXGAH8HXg56aalyQtF5MFuKpe8DCbnrWbfQt4yVRzkaTlyHfCSVITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTQywJDUxwJLUxABLUhMDLElNDLAkNTHAktTEAEtSk2UV4CSnJ/m7JDuSXNQ9H0ma0rIJcJI1wO8BZwAnAi9IcmLvrCRpOssmwMCpwI6q+mxV/QPwNuCs5jlJ0mRSVd1zACDJOcDpVfUfxvrPAk+rql/eZb/NwOax+gPA3y3pRPfuKOCu7kkcIDxW8/NYzWe5Hqe7qur0XQfXdszkO1FVlwGXdc/j4STZWlUbu+dxIPBYzc9jNZ8D7Tgtp0sQtwHHLVpfP8YkaUVaTgH+OHBCkuOTHAKcB1zdPCdJmsyyuQRRVfcn+WXgGmAN8Jaquql5Wvti2V4eWYY8VvPzWM3ngDpOy+aHcJK02iynSxCStKoYYElqYoAfgSQPJNmW5IYkn0jyI4u2nZrkI+Ot1J9M8uYkh49tZyTZmuTmse11fa9iaSw6VgsfG8a4x2k3klSSP1q0vjbJziTvHesvSvLGsXxxktvGcb0xyfO65q3vUFX5MecH8LVFy88B/nIsHw18HnjGou3njPGTgM8ATxrja4Bf6n4tS3msFo15nPZwvIBtwGFj/Yyx/t6x/iLgjWP5YuDXx/K/ZPbGg4O6X8M+vOYHxmtc+Ngwxk8FPsLsTVafBN4MHL7ouGwFbh7bXreHr38xs1tZtwGfAt60cJyAg4FXA58GPgF8DDhjbHsU8L/G/4/XAx9m9qaw/X4Mls1dEAegI4B7xvJLgC1V9bGFjVX1ToAkrwVeVVWfGuMPMPsfYTXyOO3Z+4AzgXcCLwDeCvzonj6hqm5Jcj+zd4DdOfkM969vVNUpiweSHA28Azhv4f+T8S7ZRyf5PuCNwJlV9anx/JjNu37RXVxaVZckOYhZ1H8c+BDwW8AxwElVdd/4e398fM6bgc8BJ1TVg0mOZ/Z8mv3OSxCPzGHjn32fYvYf6bfG+EnMvlPuzp62rWQLx2pbkj8dYx6nPXsbcF6SQ4GTgev29glJngY8COyceG5LZbffpKvqDuBCdvkmXVXzfpM+BDgUuGdc8nox8CtVdd/4WndU1VVJngA8DfivVfXg2Pa5qvrf++sFLuYZ8CPz0HfsJM8ArkxyUvOclqtvO7vRnlXV9nGt/AXMzob35GVJzge+Cvx0jX87H2AOS7JtLH+uqv4ts2/EWx5m/5OAR/pzgYXj9L3An1fVtiQnA/+vqr6ym/1/ENg2/gU2Oc+A99H4Dn0UsA64Cfjhh9l1T9tWG4/T3l0NXMLs8sOeXFpVp1TVj1bVR5dgXlP4xngNp4z4TuHScSLwz4DvTnLeRH/PPjHA+yjJk5j9oOhuZtelNo1/Di5sf/64rvRa4DeTfP8YPyjJL3bMeRnwOO3dW4BXVNXfdk+kySTfpKvqH4H3Az8G7AD+RZIjHubvePK4vjw5A/zIPHRdE3g7sGlch7qD2bMrLhm3V93C7C6Jr1bVduClwFvH+I3A93W9gE4ep72rqlur6g3d82g0yTfpJAGeCXymqr4OXA68fjx3hiTrkpxbVZ9hdpfFK8bnkGRDkjP342v81rwOzEtHkg50Sb5WVY/azfgzgNcwu2zwILO7F15WVV9P8pPAK4DDgWJ2m96FD/P1L2b2w7adzG472w78fFV9Y4T3lcDzgW8Cfw+8vKquGWfGrwNOA77B7Da/36iqj++3F78wRwMsST28BCFJTbwNTdIBLcl/Ac7dZfgdVfWqjvk8El6CkKQmXoKQpCYGWJKaGGCtKIseg3ljknckOXzcx3lj99ykXRlgrTQLb289CfgHYDW+m04HCAOsleyjwBPH8pokv5/kpiR/keQwgCQvTvLx8ZD9dy16OPy54yz6hiQfGWNrkrx27L89yS+M8WPGQ+YXzrz3+AhJaYEB1oqUZC2zh3cvPFPhBOD3quoHgS8D/26Mv7uqnlpVTwZuAS4Y4y8HnjPGF37jxAXAvVX1VOCpwIvHs2L/PXDNeOjLk5k9AFzaK+8D1kqz+BGHH2X2nv/vYfa4w4Xx64ENY/mkJK8EjmT2mxCuGeP/B7giyVXAu8fYvwZOHg8IB3gMs7B/HHhLkoOBP1v090h7ZIC10uzutywA3Ldo6AHgsLF8BXB2Vd2Q5EXATwBU1S+OB8KcCVyf5IeBMHuI9zXsIsmPjX2vSPI7VXXl/nxRWpm8BKHV7tHA7ePs9WcWBpM8oaquq6qXM3uYy3HMzo5/aexLku9P8t1Jvhe4o6p+n9lvSnnKkr8KHZA8A9Zq99+Y/eqfnePPR4/x1yY5gdlZ77XADcyeprUB+MR4VOFO4GxmZ82/keQfmf1yzRcu4fx1APOtyJLUxEsQktTEAEtSEwMsSU0MsCQ1McCS1MQAS1ITAyxJTf4/2RP10kKVt8MAAAAASUVORK5CYII=\n"
          },
          "metadata": {
            "needs_background": "light"
          }
        }
      ],
      "source": [
        "## Visualization of unique values in Target variable\n",
        "MIP = df3[df3['Phases']=='MIP'].shape[0]\n",
        "BCC = df3[df3['Phases']=='BCC'].shape[0]\n",
        "FCC = df3[df3['Phases']=='FCC'].shape[0]\n",
        "FCC_BCC = df3[df3['Phases']=='FCC_BCC'].shape[0]\n",
        "print(\"MIP: \" + str(MIP) + \", BCC: \" + str(BCC)  + \", FCC: \" + str(FCC)  + \", FCC_BCC : \" + str(FCC_BCC ))\n",
        "sns.catplot(data=df3, x=\"Phases\", kind=\"count\", palette=\"winter_r\", alpha=.6)\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 98,
      "metadata": {
        "id": "HHD7hYOzmkj5",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "b7170100-a907-478f-ab53-8787caf3cbad"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "We have 35 numerical features : ['Al', 'Co', 'Cr', 'Fe', 'Ni', 'Cu', 'Mn', 'Ti', 'V', 'Nb', 'Mo', 'Zr', 'Hf', 'Ta', 'W', 'C', 'Mg', 'Zn', 'Si', 'Re', 'N', 'Li', 'Sn', 'Be', 'B', 'Ag', 'Pt', 'Y', 'Pd', 'Au', 'dHmix', 'dSmix', 'δ', 'ᐃχ', 'VEC']\n",
            "\n",
            "We have 1 categorical features : ['Phases']\n"
          ]
        }
      ],
      "source": [
        "# define numerical & categorical columns\n",
        "numeric_features = [feature for feature in df3.columns if df3[feature].dtype != 'O']\n",
        "categorical_features = [feature for feature in df3.columns if df3[feature].dtype == 'O']\n",
        "\n",
        "# print columns\n",
        "print('We have {} numerical features : {}'.format(len(numeric_features), numeric_features))\n",
        "print('\\nWe have {} categorical features : {}'.format(len(categorical_features), categorical_features))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "_GxWn4Ukmkj6"
      },
      "source": [
        "### Checking missing values"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 99,
      "metadata": {
        "id": "Bqb2WLli8aio"
      },
      "outputs": [],
      "source": [
        "feature_names =  ['Al', 'Co', 'Cr', 'Fe', 'Ni', 'Cu', 'Mn', 'Ti', 'V', 'Nb', 'Mo', 'Zr',\n",
        "       'Hf', 'Ta', 'W', 'C', 'Mg', 'Zn', 'Si', 'Re', 'N', 'Li', 'Sn', 'Be',\n",
        "       'B', 'Ag', 'Pt', 'Y', 'Pd', 'Au', 'dHmix', 'dSmix', 'δ', 'ᐃχ', 'VEC', 'Phases']"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 100,
      "metadata": {
        "id": "gana74hF9F6V"
      },
      "outputs": [],
      "source": [
        "df3['Phases'].replace({'MIP':0, 'BCC':1, 'FCC':2, 'FCC_BCC':3}, inplace=True) "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 101,
      "metadata": {
        "id": "-Mm5xHNY8Eze",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 487
        },
        "outputId": "c490fd90-2d54-48fc-bc4f-97ce2343e63f"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "            Al        Co        Cr        Fe        Ni        Cu        Mn  \\\n",
              "0     0.111000  0.232667  0.227333  0.227333  0.227333  0.186000  0.242333   \n",
              "1     0.158000  0.205333  0.216333  0.254000  0.197000  0.209333  0.242333   \n",
              "2     0.588000  0.188667  0.257333  0.278333  0.218333  0.511000  0.216000   \n",
              "3     0.588000  0.226667  0.257333  0.278333  0.234667  0.480667  0.255333   \n",
              "4     0.476000  0.166667  0.248000  0.264667  0.185333  0.489000  0.216667   \n",
              "...        ...       ...       ...       ...       ...       ...       ...   \n",
              "1195  0.143000  0.143000  0.143000  0.143000  0.143000  0.143000  0.560667   \n",
              "1196  0.185333  0.167000  0.167000  0.167000  0.167000  0.453667  0.167000   \n",
              "1197  0.182333  0.167000  0.167000  0.167000  0.167000  0.223667  0.167000   \n",
              "1198  0.390000  0.143000  0.143000  0.143000  0.143000  0.143000  0.159000   \n",
              "1199  0.287333  0.200000  0.199333  0.200000  0.400000  0.297000  0.200000   \n",
              "\n",
              "            Ti         V        Nb  ...   Pt         Y   Pd     Au   dHmix  \\\n",
              "0     0.222000  0.222000  0.222000  ...  0.2  0.174667  0.3  0.167  -8.395   \n",
              "1     0.215000  0.215000  0.215000  ...  0.2  0.174667  0.3  0.167  -9.352   \n",
              "2     0.235000  0.235000  0.235000  ...  0.2  0.174667  0.3  0.167  -4.042   \n",
              "3     0.235000  0.235000  0.235000  ...  0.2  0.174667  0.3  0.167  -4.817   \n",
              "4     0.239000  0.239000  0.232000  ...  0.2  0.174667  0.3  0.167  -3.356   \n",
              "...        ...       ...       ...  ...  ...       ...  ...    ...     ...   \n",
              "1195  0.668000  0.566667  0.133333  ...  0.2  0.128333  0.3  0.167 -18.857   \n",
              "1196  0.140667  0.430333  0.167000  ...  0.2  0.174667  0.3  0.167 -12.000   \n",
              "1197  0.167000  0.711667  0.291000  ...  0.2  0.147333  0.3  0.167 -13.444   \n",
              "1198  0.286000  0.711667  0.291000  ...  0.2  0.147333  0.3  0.167 -14.041   \n",
              "1199  0.433000  0.103667  0.246333  ...  0.2  0.174667  0.3  0.167  -4.160   \n",
              "\n",
              "       dSmix      δ      ᐃχ    VEC  Phases  \n",
              "0     13.146  3.738  0.0500  4.556     1.0  \n",
              "1     13.333  3.863  0.2330  4.684     1.0  \n",
              "2     12.708  4.003  0.2430  4.882     1.0  \n",
              "3     12.708  3.832  0.0500  4.647     1.0  \n",
              "4     12.569  4.018  0.2440  4.905     1.0  \n",
              "...      ...    ...     ...    ...     ...  \n",
              "1195  16.175  6.126  0.1160  7.286     0.0  \n",
              "1196  14.894  5.495  0.1410  7.500     0.0  \n",
              "1197  14.894  6.293  0.1520  7.333     0.0  \n",
              "1198  14.529  7.241  0.1550  7.429     0.0  \n",
              "1199  11.070  3.290  0.1361  8.800     2.0  \n",
              "\n",
              "[1200 rows x 36 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-44bc5330-b5aa-430c-90df-1efea897f2a3\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Al</th>\n",
              "      <th>Co</th>\n",
              "      <th>Cr</th>\n",
              "      <th>Fe</th>\n",
              "      <th>Ni</th>\n",
              "      <th>Cu</th>\n",
              "      <th>Mn</th>\n",
              "      <th>Ti</th>\n",
              "      <th>V</th>\n",
              "      <th>Nb</th>\n",
              "      <th>...</th>\n",
              "      <th>Pt</th>\n",
              "      <th>Y</th>\n",
              "      <th>Pd</th>\n",
              "      <th>Au</th>\n",
              "      <th>dHmix</th>\n",
              "      <th>dSmix</th>\n",
              "      <th>δ</th>\n",
              "      <th>ᐃχ</th>\n",
              "      <th>VEC</th>\n",
              "      <th>Phases</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0.111000</td>\n",
              "      <td>0.232667</td>\n",
              "      <td>0.227333</td>\n",
              "      <td>0.227333</td>\n",
              "      <td>0.227333</td>\n",
              "      <td>0.186000</td>\n",
              "      <td>0.242333</td>\n",
              "      <td>0.222000</td>\n",
              "      <td>0.222000</td>\n",
              "      <td>0.222000</td>\n",
              "      <td>...</td>\n",
              "      <td>0.2</td>\n",
              "      <td>0.174667</td>\n",
              "      <td>0.3</td>\n",
              "      <td>0.167</td>\n",
              "      <td>-8.395</td>\n",
              "      <td>13.146</td>\n",
              "      <td>3.738</td>\n",
              "      <td>0.0500</td>\n",
              "      <td>4.556</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>0.158000</td>\n",
              "      <td>0.205333</td>\n",
              "      <td>0.216333</td>\n",
              "      <td>0.254000</td>\n",
              "      <td>0.197000</td>\n",
              "      <td>0.209333</td>\n",
              "      <td>0.242333</td>\n",
              "      <td>0.215000</td>\n",
              "      <td>0.215000</td>\n",
              "      <td>0.215000</td>\n",
              "      <td>...</td>\n",
              "      <td>0.2</td>\n",
              "      <td>0.174667</td>\n",
              "      <td>0.3</td>\n",
              "      <td>0.167</td>\n",
              "      <td>-9.352</td>\n",
              "      <td>13.333</td>\n",
              "      <td>3.863</td>\n",
              "      <td>0.2330</td>\n",
              "      <td>4.684</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>0.588000</td>\n",
              "      <td>0.188667</td>\n",
              "      <td>0.257333</td>\n",
              "      <td>0.278333</td>\n",
              "      <td>0.218333</td>\n",
              "      <td>0.511000</td>\n",
              "      <td>0.216000</td>\n",
              "      <td>0.235000</td>\n",
              "      <td>0.235000</td>\n",
              "      <td>0.235000</td>\n",
              "      <td>...</td>\n",
              "      <td>0.2</td>\n",
              "      <td>0.174667</td>\n",
              "      <td>0.3</td>\n",
              "      <td>0.167</td>\n",
              "      <td>-4.042</td>\n",
              "      <td>12.708</td>\n",
              "      <td>4.003</td>\n",
              "      <td>0.2430</td>\n",
              "      <td>4.882</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>0.588000</td>\n",
              "      <td>0.226667</td>\n",
              "      <td>0.257333</td>\n",
              "      <td>0.278333</td>\n",
              "      <td>0.234667</td>\n",
              "      <td>0.480667</td>\n",
              "      <td>0.255333</td>\n",
              "      <td>0.235000</td>\n",
              "      <td>0.235000</td>\n",
              "      <td>0.235000</td>\n",
              "      <td>...</td>\n",
              "      <td>0.2</td>\n",
              "      <td>0.174667</td>\n",
              "      <td>0.3</td>\n",
              "      <td>0.167</td>\n",
              "      <td>-4.817</td>\n",
              "      <td>12.708</td>\n",
              "      <td>3.832</td>\n",
              "      <td>0.0500</td>\n",
              "      <td>4.647</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>0.476000</td>\n",
              "      <td>0.166667</td>\n",
              "      <td>0.248000</td>\n",
              "      <td>0.264667</td>\n",
              "      <td>0.185333</td>\n",
              "      <td>0.489000</td>\n",
              "      <td>0.216667</td>\n",
              "      <td>0.239000</td>\n",
              "      <td>0.239000</td>\n",
              "      <td>0.232000</td>\n",
              "      <td>...</td>\n",
              "      <td>0.2</td>\n",
              "      <td>0.174667</td>\n",
              "      <td>0.3</td>\n",
              "      <td>0.167</td>\n",
              "      <td>-3.356</td>\n",
              "      <td>12.569</td>\n",
              "      <td>4.018</td>\n",
              "      <td>0.2440</td>\n",
              "      <td>4.905</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1195</th>\n",
              "      <td>0.143000</td>\n",
              "      <td>0.143000</td>\n",
              "      <td>0.143000</td>\n",
              "      <td>0.143000</td>\n",
              "      <td>0.143000</td>\n",
              "      <td>0.143000</td>\n",
              "      <td>0.560667</td>\n",
              "      <td>0.668000</td>\n",
              "      <td>0.566667</td>\n",
              "      <td>0.133333</td>\n",
              "      <td>...</td>\n",
              "      <td>0.2</td>\n",
              "      <td>0.128333</td>\n",
              "      <td>0.3</td>\n",
              "      <td>0.167</td>\n",
              "      <td>-18.857</td>\n",
              "      <td>16.175</td>\n",
              "      <td>6.126</td>\n",
              "      <td>0.1160</td>\n",
              "      <td>7.286</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1196</th>\n",
              "      <td>0.185333</td>\n",
              "      <td>0.167000</td>\n",
              "      <td>0.167000</td>\n",
              "      <td>0.167000</td>\n",
              "      <td>0.167000</td>\n",
              "      <td>0.453667</td>\n",
              "      <td>0.167000</td>\n",
              "      <td>0.140667</td>\n",
              "      <td>0.430333</td>\n",
              "      <td>0.167000</td>\n",
              "      <td>...</td>\n",
              "      <td>0.2</td>\n",
              "      <td>0.174667</td>\n",
              "      <td>0.3</td>\n",
              "      <td>0.167</td>\n",
              "      <td>-12.000</td>\n",
              "      <td>14.894</td>\n",
              "      <td>5.495</td>\n",
              "      <td>0.1410</td>\n",
              "      <td>7.500</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1197</th>\n",
              "      <td>0.182333</td>\n",
              "      <td>0.167000</td>\n",
              "      <td>0.167000</td>\n",
              "      <td>0.167000</td>\n",
              "      <td>0.167000</td>\n",
              "      <td>0.223667</td>\n",
              "      <td>0.167000</td>\n",
              "      <td>0.167000</td>\n",
              "      <td>0.711667</td>\n",
              "      <td>0.291000</td>\n",
              "      <td>...</td>\n",
              "      <td>0.2</td>\n",
              "      <td>0.147333</td>\n",
              "      <td>0.3</td>\n",
              "      <td>0.167</td>\n",
              "      <td>-13.444</td>\n",
              "      <td>14.894</td>\n",
              "      <td>6.293</td>\n",
              "      <td>0.1520</td>\n",
              "      <td>7.333</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1198</th>\n",
              "      <td>0.390000</td>\n",
              "      <td>0.143000</td>\n",
              "      <td>0.143000</td>\n",
              "      <td>0.143000</td>\n",
              "      <td>0.143000</td>\n",
              "      <td>0.143000</td>\n",
              "      <td>0.159000</td>\n",
              "      <td>0.286000</td>\n",
              "      <td>0.711667</td>\n",
              "      <td>0.291000</td>\n",
              "      <td>...</td>\n",
              "      <td>0.2</td>\n",
              "      <td>0.147333</td>\n",
              "      <td>0.3</td>\n",
              "      <td>0.167</td>\n",
              "      <td>-14.041</td>\n",
              "      <td>14.529</td>\n",
              "      <td>7.241</td>\n",
              "      <td>0.1550</td>\n",
              "      <td>7.429</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1199</th>\n",
              "      <td>0.287333</td>\n",
              "      <td>0.200000</td>\n",
              "      <td>0.199333</td>\n",
              "      <td>0.200000</td>\n",
              "      <td>0.400000</td>\n",
              "      <td>0.297000</td>\n",
              "      <td>0.200000</td>\n",
              "      <td>0.433000</td>\n",
              "      <td>0.103667</td>\n",
              "      <td>0.246333</td>\n",
              "      <td>...</td>\n",
              "      <td>0.2</td>\n",
              "      <td>0.174667</td>\n",
              "      <td>0.3</td>\n",
              "      <td>0.167</td>\n",
              "      <td>-4.160</td>\n",
              "      <td>11.070</td>\n",
              "      <td>3.290</td>\n",
              "      <td>0.1361</td>\n",
              "      <td>8.800</td>\n",
              "      <td>2.0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>1200 rows × 36 columns</p>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-44bc5330-b5aa-430c-90df-1efea897f2a3')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-44bc5330-b5aa-430c-90df-1efea897f2a3 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-44bc5330-b5aa-430c-90df-1efea897f2a3');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 101
        }
      ],
      "source": [
        "from sklearn.impute import KNNImputer\n",
        "from sklearn.pipeline import Pipeline\n",
        "imputer = KNNImputer(n_neighbors=3)\n",
        "df_imp = imputer.fit_transform(df3)\n",
        "df_afterimp =pd.DataFrame(df_imp, columns=feature_names[0:36])\n",
        "df_afterimp"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "yNVhMH0m_hu-"
      },
      "source": [
        "outlier removal after missing value imputation\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 102,
      "metadata": {
        "id": "HkRVz8WW_n_U"
      },
      "outputs": [],
      "source": [
        "df = df_afterimp.copy()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 103,
      "metadata": {
        "id": "mS450J8m_ux1"
      },
      "outputs": [],
      "source": [
        "def IQR_capping(df_afterimp, cols, factor):\n",
        "    \n",
        "    for col in cols:\n",
        "        Q1 = df_afterimp[col].quantile(0.10)\n",
        "        Q3 = df_afterimp[col].quantile(0.90)\n",
        "        IQR=Q3-Q1\n",
        "\n",
        "        lower_boundary = Q1-(factor*IQR)\n",
        "        upper_boundary = Q3+(factor*IQR)\n",
        "        \n",
        "\n",
        "        df_afterimp[col] = np.where(df_afterimp[col]>upper_boundary, upper_boundary, np.where(df_afterimp[col]<lower_boundary, lower_boundary, df_afterimp[col]))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 104,
      "metadata": {
        "id": "eS3oxy9-_v3r"
      },
      "outputs": [],
      "source": [
        "IQR_capping(df, feature_names, 1.5)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 105,
      "metadata": {
        "id": "CfPevh6E_y8s"
      },
      "outputs": [],
      "source": [
        "# for col in feature_names:\n",
        "#     plt.figure(figsize=(16,4))\n",
        "\n",
        "#     plt.subplot(141)\n",
        "#     sns.distplot(df_afterimp[col], label='skew: '+ str(np.round(df_afterimp[col].skew(),2)))\n",
        "#     plt.title('Before')\n",
        "#     plt.legend()\n",
        "\n",
        "#     plt.subplot(142)\n",
        "#     sns.distplot(df[col], label ='skew '+ str(np.round(df[col].skew(),2)))\n",
        "#     plt.title('After')\n",
        "#     plt.legend()\n",
        "\n",
        "#     plt.subplot(143)\n",
        "#     sns.boxplot(df_afterimp[col])\n",
        "#     plt.title('Before')\n",
        "\n",
        "#     plt.subplot(144)\n",
        "#     sns.boxplot(df[col])\n",
        "#     plt.title('After')\n",
        "#     plt.tight_layout()\n",
        "#     plt.show()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "N5RYFNGjmkj9"
      },
      "source": [
        "## Create Functions for model training and evaluation"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 106,
      "metadata": {
        "id": "snr8L-EVmkj9"
      },
      "outputs": [],
      "source": [
        "def evaluate_clf(true, predicted):\n",
        "    '''\n",
        "    This function takes in true values and predicted values\n",
        "    Returns: Accuracy, F1-Score, Precision, Recall\n",
        "    '''\n",
        "    acc = accuracy_score(true, predicted) # Calculate Accuracy\n",
        "    f1 = f1_score(true, predicted,average='weighted') # Calculate F1-score\n",
        "    precision = precision_score(true, predicted, average='weighted') # Calculate Precision\n",
        "    recall = recall_score(true, predicted, average='weighted')  # Calculate Recall\n",
        "   \n",
        "    return acc, f1 , precision, recall"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 107,
      "metadata": {
        "id": "qSI82B3xmkj-"
      },
      "outputs": [],
      "source": [
        "# Create a function which can evaluate models and return a report \n",
        "def evaluate_models(X, y, models):\n",
        "    '''\n",
        "    This function takes in X and y and models dictionary as input\n",
        "    It splits the data into Train Test split\n",
        "    Iterates through the given model dictionary and evaluates the metrics\n",
        "    Returns: Dataframe which contains report of all models metrics\n",
        "    '''\n",
        "    # separate dataset into train and test\n",
        "    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)\n",
        "    \n",
        "    cost_list=[]\n",
        "    models_list = []\n",
        "    accuracy_list = []\n",
        "    \n",
        "    for i in range(len(list(models))):\n",
        "        model = list(models.values())[i]\n",
        "        model.fit(X_train, y_train) # Train model\n",
        "\n",
        "        # Make predictions\n",
        "        y_train_pred = model.predict(X_train)\n",
        "        y_test_pred = model.predict(X_test)\n",
        "       \n",
        "\n",
        "        # Training set performance\n",
        "        model_train_accuracy, model_train_f1,model_train_precision,\\\n",
        "        model_train_recall=evaluate_clf(y_train ,y_train_pred)\n",
        "        \n",
        "\n",
        "        # Test set performance\n",
        "        model_test_accuracy,model_test_f1,model_test_precision,\\\n",
        "        model_test_recall=evaluate_clf(y_test, y_test_pred)\n",
        "\n",
        "       \n",
        "        print(list(models.keys())[i])\n",
        "        models_list.append(list(models.keys())[i])\n",
        "\n",
        "        print('Model performance for Training set')\n",
        "        print(\"- Accuracy: {:.4f}\".format(model_train_accuracy))\n",
        "        print('- F1 score: {:.4f}'.format(model_train_f1)) \n",
        "        print('- Precision: {:.4f}'.format(model_train_precision))\n",
        "        print('- Recall: {:.4f}'.format(model_train_recall))\n",
        "         \n",
        "       \n",
        "\n",
        "        print('----------------------------------')\n",
        "\n",
        "        print('Model performance for Test set')\n",
        "        print('- Accuracy: {:.4f}'.format(model_test_accuracy))\n",
        "        print('- F1 score: {:.4f}'.format(model_test_f1))\n",
        "        print('- Precision: {:.4f}'.format(model_test_precision))\n",
        "        print('- Recall: {:.4f}'.format(model_test_recall))\n",
        "       \n",
        "       \n",
        "        print('='*35)\n",
        "        print('\\n')\n",
        "        \n",
        "    report=pd.DataFrame(list(zip(models_list, cost_list)), columns=['Model Name', 'Cost']).sort_values(by=[\"Cost\"])\n",
        "        \n",
        "    return report"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5k1CaW9cmkj-"
      },
      "source": [
        "### Plot  distribution of all Independent Numerical variables"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 108,
      "metadata": {
        "id": "YvJ2TGK9mkj-"
      },
      "outputs": [],
      "source": [
        "# numeric_features = [feature for feature in df.columns if df[feature].dtype != 'O']\n",
        "\n",
        "# plt.figure(figsize=(15, 100))\n",
        "# for i, col in enumerate(numeric_features):\n",
        "#     plt.subplot(60, 3, i+1)\n",
        "#     sns.distplot(x=df[col], color='indianred')\n",
        "#     plt.xlabel(col, weight='bold')\n",
        "#     plt.tight_layout()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 109,
      "metadata": {
        "id": "xhftSkwu2TuH"
      },
      "outputs": [],
      "source": [
        "df2= df.copy()\n",
        "Al_trans = np.log(df2['Al'])\n",
        "Co_trans = np.log(df2['Co'])\n",
        "Cr_trans = np.log(df2['Cr'])\n",
        "Fe_trans = np.log(df2['Fe'])\n",
        "Ni_trans = np.log(df2['Ni'])\n",
        "Cu_trans = np.log(df2['Cu'])\n",
        "Mn_trans = np.log(df2['Mn'])\n",
        "Ti_trans = np.log(df2['Ti'])\n",
        "V_trans = np.log(df2['V'])\n",
        "Nb_trans = np.log(df2['Nb'])\n",
        "Mo_trans = np.log(df2['Mo'])\n",
        "Zr_trans = np.log(df2['Zr'])\n",
        "Hf_trans = np.log(df2['Hf'])\n",
        "Ta_trans = np.log(df2['Ta'])\n",
        "W_trans = np.log(df2['W'])\n",
        "C_trans = np.log(df2['C'])\n",
        "Mg_trans = np.log(df2['Mg'])\n",
        "Zn_trans = np.log(df2['Zn'])\n",
        "Si_trans = np.log(df2['Si'])\n",
        "Re_trans = np.log(df2['Re'])\n",
        "N_trans = np.log(df2['N'])\n",
        "Li_trans = np.log(df2['Li'])\n",
        "Sn_trans = np.log(df2['Sn'])\n",
        "Be_trans = np.log(df2['Be'])\n",
        "B_trans = np.log(df2['B'])\n",
        "Ag_trans = np.log(df2['Ag'])\n",
        "Pt_trans = np.log(df2['Pt'])\n",
        "Y_trans = np.log(df2['Y'])\n",
        "Pd_trans = np.log(df2['Pd'])\n",
        "Au_trans = np.log(df2['Au'])\n",
        "#dHmix_trans = np.log(df2['dHmix'])\n",
        "dSmix_trans = np.log(df2['dSmix']) \n",
        "Atom_Size_Diff_trans = np.log(df2['δ'])\n",
        "Elect_Diff_trans = np.log(df2['ᐃχ'])\n",
        "VEC_trans = np.log(df2['VEC'])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 110,
      "metadata": {
        "id": "YqyteeKkBzb2",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 487
        },
        "outputId": "89a2481c-9788-4f1b-d8a0-b36ac8fb1782"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "            Al        Co        Cr        Fe        Ni        Cu        Mn  \\\n",
              "0    -2.198225 -1.458148 -1.481338 -1.481338 -1.481338 -1.682009 -1.417441   \n",
              "1    -1.845160 -1.583121 -1.530935 -1.370421 -1.624552 -1.563827 -1.417441   \n",
              "2    -0.531028 -1.667773 -1.357383 -1.278936 -1.521732 -0.671386 -1.532477   \n",
              "3    -0.531028 -1.484275 -1.357383 -1.278936 -1.449589 -0.732581 -1.365185   \n",
              "4    -0.742337 -1.791759 -1.394327 -1.329284 -1.685599 -0.715393 -1.529395   \n",
              "...        ...       ...       ...       ...       ...       ...       ...   \n",
              "1195 -1.944911 -1.944911 -1.944911 -1.944911 -1.944911 -1.944911 -0.578629   \n",
              "1196 -1.685599 -1.789761 -1.789761 -1.789761 -1.789761 -0.790393 -1.789761   \n",
              "1197 -1.701919 -1.789761 -1.789761 -1.789761 -1.789761 -1.497598 -1.789761   \n",
              "1198 -0.941609 -1.944911 -1.944911 -1.944911 -1.944911 -1.944911 -1.838851   \n",
              "1199 -1.247112 -1.609438 -1.612777 -1.609438 -0.916291 -1.214023 -1.609438   \n",
              "\n",
              "            Ti         V        Nb  ...        Pt         Y        Pd  \\\n",
              "0    -1.505078 -1.505078 -1.505078  ... -1.609438 -1.744876 -1.203973   \n",
              "1    -1.537117 -1.537117 -1.537117  ... -1.609438 -1.744876 -1.203973   \n",
              "2    -1.448170 -1.448170 -1.448170  ... -1.609438 -1.744876 -1.203973   \n",
              "3    -1.448170 -1.448170 -1.448170  ... -1.609438 -1.744876 -1.203973   \n",
              "4    -1.431292 -1.431292 -1.461018  ... -1.609438 -1.744876 -1.203973   \n",
              "...        ...       ...       ...  ...       ...       ...       ...   \n",
              "1195 -0.403467 -0.567984 -2.014903  ... -1.609438 -2.053124 -1.203973   \n",
              "1196 -1.961362 -0.843195 -1.789761  ... -1.609438 -1.744876 -1.203973   \n",
              "1197 -1.789761 -0.340146 -1.234432  ... -1.609438 -1.915058 -1.203973   \n",
              "1198 -1.251763 -0.340146 -1.234432  ... -1.609438 -1.915058 -1.203973   \n",
              "1199 -0.837018 -2.266575 -1.401070  ... -1.609438 -1.744876 -1.203973   \n",
              "\n",
              "            Au   dHmix     dSmix         δ        ᐃχ       VEC  Phases  \n",
              "0    -1.789761  -8.395  2.576118  1.318551 -2.995732  1.516445     1.0  \n",
              "1    -1.789761  -9.352  2.590242  1.351444 -1.456717  1.544152     1.0  \n",
              "2    -1.789761  -4.042  2.542232  1.387044 -1.414694  1.585555     1.0  \n",
              "3    -1.789761  -4.817  2.542232  1.343387 -2.995732  1.536222     1.0  \n",
              "4    -1.789761  -3.356  2.531233  1.390784 -1.410587  1.590255     1.0  \n",
              "...        ...     ...       ...       ...       ...       ...     ...  \n",
              "1195 -1.789761 -18.857  2.783467  1.812542 -2.154165  1.985955     0.0  \n",
              "1196 -1.789761 -12.000  2.700958  1.703839 -1.958995  2.014903     0.0  \n",
              "1197 -1.789761 -13.444  2.700958  1.839438 -1.883875  1.992385     0.0  \n",
              "1198 -1.789761 -14.041  2.676147  1.979759 -1.864330  2.005391     0.0  \n",
              "1199 -1.789761  -4.160  2.404239  1.190888 -1.994365  2.174752     2.0  \n",
              "\n",
              "[1200 rows x 36 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-47fedaa1-23bb-4a88-9ac7-094a8efe0bd5\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Al</th>\n",
              "      <th>Co</th>\n",
              "      <th>Cr</th>\n",
              "      <th>Fe</th>\n",
              "      <th>Ni</th>\n",
              "      <th>Cu</th>\n",
              "      <th>Mn</th>\n",
              "      <th>Ti</th>\n",
              "      <th>V</th>\n",
              "      <th>Nb</th>\n",
              "      <th>...</th>\n",
              "      <th>Pt</th>\n",
              "      <th>Y</th>\n",
              "      <th>Pd</th>\n",
              "      <th>Au</th>\n",
              "      <th>dHmix</th>\n",
              "      <th>dSmix</th>\n",
              "      <th>δ</th>\n",
              "      <th>ᐃχ</th>\n",
              "      <th>VEC</th>\n",
              "      <th>Phases</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>-2.198225</td>\n",
              "      <td>-1.458148</td>\n",
              "      <td>-1.481338</td>\n",
              "      <td>-1.481338</td>\n",
              "      <td>-1.481338</td>\n",
              "      <td>-1.682009</td>\n",
              "      <td>-1.417441</td>\n",
              "      <td>-1.505078</td>\n",
              "      <td>-1.505078</td>\n",
              "      <td>-1.505078</td>\n",
              "      <td>...</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-1.744876</td>\n",
              "      <td>-1.203973</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-8.395</td>\n",
              "      <td>2.576118</td>\n",
              "      <td>1.318551</td>\n",
              "      <td>-2.995732</td>\n",
              "      <td>1.516445</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>-1.845160</td>\n",
              "      <td>-1.583121</td>\n",
              "      <td>-1.530935</td>\n",
              "      <td>-1.370421</td>\n",
              "      <td>-1.624552</td>\n",
              "      <td>-1.563827</td>\n",
              "      <td>-1.417441</td>\n",
              "      <td>-1.537117</td>\n",
              "      <td>-1.537117</td>\n",
              "      <td>-1.537117</td>\n",
              "      <td>...</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-1.744876</td>\n",
              "      <td>-1.203973</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-9.352</td>\n",
              "      <td>2.590242</td>\n",
              "      <td>1.351444</td>\n",
              "      <td>-1.456717</td>\n",
              "      <td>1.544152</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>-0.531028</td>\n",
              "      <td>-1.667773</td>\n",
              "      <td>-1.357383</td>\n",
              "      <td>-1.278936</td>\n",
              "      <td>-1.521732</td>\n",
              "      <td>-0.671386</td>\n",
              "      <td>-1.532477</td>\n",
              "      <td>-1.448170</td>\n",
              "      <td>-1.448170</td>\n",
              "      <td>-1.448170</td>\n",
              "      <td>...</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-1.744876</td>\n",
              "      <td>-1.203973</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-4.042</td>\n",
              "      <td>2.542232</td>\n",
              "      <td>1.387044</td>\n",
              "      <td>-1.414694</td>\n",
              "      <td>1.585555</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>-0.531028</td>\n",
              "      <td>-1.484275</td>\n",
              "      <td>-1.357383</td>\n",
              "      <td>-1.278936</td>\n",
              "      <td>-1.449589</td>\n",
              "      <td>-0.732581</td>\n",
              "      <td>-1.365185</td>\n",
              "      <td>-1.448170</td>\n",
              "      <td>-1.448170</td>\n",
              "      <td>-1.448170</td>\n",
              "      <td>...</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-1.744876</td>\n",
              "      <td>-1.203973</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-4.817</td>\n",
              "      <td>2.542232</td>\n",
              "      <td>1.343387</td>\n",
              "      <td>-2.995732</td>\n",
              "      <td>1.536222</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>-0.742337</td>\n",
              "      <td>-1.791759</td>\n",
              "      <td>-1.394327</td>\n",
              "      <td>-1.329284</td>\n",
              "      <td>-1.685599</td>\n",
              "      <td>-0.715393</td>\n",
              "      <td>-1.529395</td>\n",
              "      <td>-1.431292</td>\n",
              "      <td>-1.431292</td>\n",
              "      <td>-1.461018</td>\n",
              "      <td>...</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-1.744876</td>\n",
              "      <td>-1.203973</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-3.356</td>\n",
              "      <td>2.531233</td>\n",
              "      <td>1.390784</td>\n",
              "      <td>-1.410587</td>\n",
              "      <td>1.590255</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1195</th>\n",
              "      <td>-1.944911</td>\n",
              "      <td>-1.944911</td>\n",
              "      <td>-1.944911</td>\n",
              "      <td>-1.944911</td>\n",
              "      <td>-1.944911</td>\n",
              "      <td>-1.944911</td>\n",
              "      <td>-0.578629</td>\n",
              "      <td>-0.403467</td>\n",
              "      <td>-0.567984</td>\n",
              "      <td>-2.014903</td>\n",
              "      <td>...</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-2.053124</td>\n",
              "      <td>-1.203973</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-18.857</td>\n",
              "      <td>2.783467</td>\n",
              "      <td>1.812542</td>\n",
              "      <td>-2.154165</td>\n",
              "      <td>1.985955</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1196</th>\n",
              "      <td>-1.685599</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-0.790393</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-1.961362</td>\n",
              "      <td>-0.843195</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>...</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-1.744876</td>\n",
              "      <td>-1.203973</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-12.000</td>\n",
              "      <td>2.700958</td>\n",
              "      <td>1.703839</td>\n",
              "      <td>-1.958995</td>\n",
              "      <td>2.014903</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1197</th>\n",
              "      <td>-1.701919</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-1.497598</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-0.340146</td>\n",
              "      <td>-1.234432</td>\n",
              "      <td>...</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-1.915058</td>\n",
              "      <td>-1.203973</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-13.444</td>\n",
              "      <td>2.700958</td>\n",
              "      <td>1.839438</td>\n",
              "      <td>-1.883875</td>\n",
              "      <td>1.992385</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1198</th>\n",
              "      <td>-0.941609</td>\n",
              "      <td>-1.944911</td>\n",
              "      <td>-1.944911</td>\n",
              "      <td>-1.944911</td>\n",
              "      <td>-1.944911</td>\n",
              "      <td>-1.944911</td>\n",
              "      <td>-1.838851</td>\n",
              "      <td>-1.251763</td>\n",
              "      <td>-0.340146</td>\n",
              "      <td>-1.234432</td>\n",
              "      <td>...</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-1.915058</td>\n",
              "      <td>-1.203973</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-14.041</td>\n",
              "      <td>2.676147</td>\n",
              "      <td>1.979759</td>\n",
              "      <td>-1.864330</td>\n",
              "      <td>2.005391</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1199</th>\n",
              "      <td>-1.247112</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-1.612777</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-0.916291</td>\n",
              "      <td>-1.214023</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-0.837018</td>\n",
              "      <td>-2.266575</td>\n",
              "      <td>-1.401070</td>\n",
              "      <td>...</td>\n",
              "      <td>-1.609438</td>\n",
              "      <td>-1.744876</td>\n",
              "      <td>-1.203973</td>\n",
              "      <td>-1.789761</td>\n",
              "      <td>-4.160</td>\n",
              "      <td>2.404239</td>\n",
              "      <td>1.190888</td>\n",
              "      <td>-1.994365</td>\n",
              "      <td>2.174752</td>\n",
              "      <td>2.0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>1200 rows × 36 columns</p>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-47fedaa1-23bb-4a88-9ac7-094a8efe0bd5')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-47fedaa1-23bb-4a88-9ac7-094a8efe0bd5 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-47fedaa1-23bb-4a88-9ac7-094a8efe0bd5');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 110
        }
      ],
      "source": [
        "df_final = pd.DataFrame(pd.concat([Al_trans, Co_trans, Cr_trans, Fe_trans, Ni_trans, Cu_trans, Mn_trans, \n",
        "Ti_trans, V_trans,Nb_trans, Mo_trans, Zr_trans, Hf_trans, Ta_trans, W_trans, C_trans, Mg_trans, Zn_trans, Si_trans, Re_trans, N_trans, \n",
        "Li_trans, Sn_trans, Be_trans, B_trans, Ag_trans, Pt_trans, Y_trans, Pd_trans, Au_trans, df2['dHmix'], dSmix_trans, Atom_Size_Diff_trans, \n",
        "Elect_Diff_trans, VEC_trans, df2['Phases']], axis=1))\n",
        "df_final"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "IcaPyF-cmkj-"
      },
      "source": [
        "# Evaluate Model on Different experiments"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 111,
      "metadata": {
        "id": "ZLnlk8lLmkj_"
      },
      "outputs": [],
      "source": [
        "# Splitting X and y for all Experiments\n",
        "X= df_final.drop('Phases', axis=1)\n",
        "y = df_final['Phases']"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 112,
      "metadata": {
        "id": "19sBusUMmkj_"
      },
      "outputs": [],
      "source": [
        "y= y.replace({'MIP': 0, 'BCC': 1,'FCC': 2,'FCC_BCC': 3})"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 113,
      "metadata": {
        "id": "AGRi1lDdmkj_"
      },
      "outputs": [],
      "source": [
        "# Fit with robust scaler for KNN best K-selection experminet\n",
        "robustscaler = RobustScaler()\n",
        "X1 = robustscaler.fit_transform(X)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 114,
      "metadata": {
        "id": "K5UXXvl0mkj_"
      },
      "outputs": [],
      "source": [
        "results=[]\n",
        "# define imputer\n",
        "imputer = KNNImputer(n_neighbors=5, weights='uniform', metric='nan_euclidean')\n",
        "strategies = [str(i) for i in [1,3,5,7,9]]\n",
        "for s in strategies:\n",
        "    pipeline = Pipeline(steps=[('i', KNNImputer(n_neighbors=int(s))), ('m', LogisticRegression())])\n",
        "    scores = cross_val_score(pipeline, X1, y, scoring='accuracy', cv=2, n_jobs=-1)\n",
        "    results.append(scores)\n",
        "    #print('n_neighbors= %s || accuracy (%.4f)' % (s , mean(scores)))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 115,
      "metadata": {
        "id": "Lc9ydKsbmkj_"
      },
      "outputs": [],
      "source": [
        "### Pipeline for KNN imputer\n",
        "num_features = X.select_dtypes(exclude=\"object\").columns\n",
        "\n",
        "# Fit the KNN imputer with selected K-value\n",
        "knn_pipeline = Pipeline(steps=[\n",
        "    ('imputer', KNNImputer(n_neighbors=3)),\n",
        "    ('RobustScaler', RobustScaler())\n",
        "])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 116,
      "metadata": {
        "id": "LNeqM7AqmkkA"
      },
      "outputs": [],
      "source": [
        "X_knn =knn_pipeline.fit_transform(X)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "mJqwtYDWmkkB"
      },
      "source": [
        "### Initialize Default Models in a dictionary"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 117,
      "metadata": {
        "id": "nz1HT0QAmkkB"
      },
      "outputs": [],
      "source": [
        "# Dictionary which contains models for experiment\n",
        "models = {\n",
        "    \n",
        "    \"Random Forest\": RandomForestClassifier(),\n",
        "    \"Decision Tree\": DecisionTreeClassifier(),\n",
        "     \"K-Neighbors Classifier\": KNeighborsClassifier(),\n",
        "    \"XGBClassifier\": XGBClassifier(), \n",
        "    'SVM':SVC()\n",
        "    \n",
        "}"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "xy77_DslmkkB"
      },
      "source": [
        "### Fit KNN imputed data for models in dictionary"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 118,
      "metadata": {
        "id": "6eJUzP0TmkkB",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e8d7d06a-9138-4f80-f387-71bd8425966e"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Random Forest\n",
            "Model performance for Training set\n",
            "- Accuracy: 1.0000\n",
            "- F1 score: 1.0000\n",
            "- Precision: 1.0000\n",
            "- Recall: 1.0000\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.8333\n",
            "- F1 score: 0.8340\n",
            "- Precision: 0.8443\n",
            "- Recall: 0.8333\n",
            "===================================\n",
            "\n",
            "\n",
            "Decision Tree\n",
            "Model performance for Training set\n",
            "- Accuracy: 1.0000\n",
            "- F1 score: 1.0000\n",
            "- Precision: 1.0000\n",
            "- Recall: 1.0000\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7458\n",
            "- F1 score: 0.7463\n",
            "- Precision: 0.7510\n",
            "- Recall: 0.7458\n",
            "===================================\n",
            "\n",
            "\n",
            "K-Neighbors Classifier\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.7937\n",
            "- F1 score: 0.7927\n",
            "- Precision: 0.7933\n",
            "- Recall: 0.7937\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7042\n",
            "- F1 score: 0.7043\n",
            "- Precision: 0.7118\n",
            "- Recall: 0.7042\n",
            "===================================\n",
            "\n",
            "\n",
            "XGBClassifier\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.9458\n",
            "- F1 score: 0.9458\n",
            "- Precision: 0.9458\n",
            "- Recall: 0.9458\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.8125\n",
            "- F1 score: 0.8129\n",
            "- Precision: 0.8195\n",
            "- Recall: 0.8125\n",
            "===================================\n",
            "\n",
            "\n",
            "SVM\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.8031\n",
            "- F1 score: 0.8025\n",
            "- Precision: 0.8082\n",
            "- Recall: 0.8031\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7292\n",
            "- F1 score: 0.7286\n",
            "- Precision: 0.7373\n",
            "- Recall: 0.7292\n",
            "===================================\n",
            "\n",
            "\n"
          ]
        }
      ],
      "source": [
        "report_knn = evaluate_models(X_knn, y, models)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 119,
      "metadata": {
        "id": "Ox5_bybK3hiD"
      },
      "outputs": [],
      "source": [
        "full_data= df_final.copy()\n",
        "X_full= full_data.drop('Phases', axis=1)\n",
        "y_full = full_data['Phases']"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "uHzK-FYmmMw0"
      },
      "source": [
        "#### Hyper-parameter tuning of RFC model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 121,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "6VI3A2oWkBZk",
        "outputId": "5a3d2ad4-29e6-42ea-ab6c-b153568891b8"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:29,064]\u001b[0m A new study created in memory with name: no-name-c52c9aae-e167-4dbb-bab5-9c0513120939\u001b[0m\n",
            "\u001b[32m[I 2023-02-19 13:12:29,388]\u001b[0m Trial 0 finished with value: 0.8125 and parameters: {'random_state': None, 'min_samples_split': 9, 'max_depth': 3, 'n_estimators': 720, 'min_samples_leaf': 7}. Best is trial 0 with value: 0.8125.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8125\n",
            "Model F1-score: 0.811998\n",
            "Model precision-score: 0.825244\n",
            "Model recall-score: 0.8125\n",
            "Model Parameters: {'random_state': None, ' criterion': 9, 'max_depth': 3, 'n_estimators': 720, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:29,703]\u001b[0m Trial 1 finished with value: 0.775 and parameters: {'random_state': 25, 'min_samples_split': 6, 'max_depth': 2, 'n_estimators': 889, 'min_samples_leaf': 1}. Best is trial 0 with value: 0.8125.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.775\n",
            "Model F1-score: 0.773533\n",
            "Model precision-score: 0.780962\n",
            "Model recall-score: 0.775\n",
            "Model Parameters: {'random_state': 25, ' criterion': 6, 'max_depth': 2, 'n_estimators': 889, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:30,025]\u001b[0m Trial 2 finished with value: 0.8083333333333333 and parameters: {'random_state': 0, 'min_samples_split': 5, 'max_depth': 6, 'n_estimators': 707, 'min_samples_leaf': 9}. Best is trial 0 with value: 0.8125.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.808333\n",
            "Model F1-score: 0.808474\n",
            "Model precision-score: 0.814575\n",
            "Model recall-score: 0.808333\n",
            "Model Parameters: {'random_state': 0, ' criterion': 5, 'max_depth': 6, 'n_estimators': 707, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:30,343]\u001b[0m Trial 3 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 1, 'max_depth': 9, 'n_estimators': 531, 'min_samples_leaf': 4}. Best is trial 3 with value: 0.8541666666666666.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853294\n",
            "Model precision-score: 0.856774\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 1, 'max_depth': 9, 'n_estimators': 531, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:30,661]\u001b[0m Trial 4 finished with value: 0.8166666666666667 and parameters: {'random_state': 0, 'min_samples_split': 8, 'max_depth': 4, 'n_estimators': 973, 'min_samples_leaf': 4}. Best is trial 3 with value: 0.8541666666666666.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.816667\n",
            "Model F1-score: 0.816438\n",
            "Model precision-score: 0.820973\n",
            "Model recall-score: 0.816667\n",
            "Model Parameters: {'random_state': 0, ' criterion': 8, 'max_depth': 4, 'n_estimators': 973, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:30,986]\u001b[0m Trial 5 finished with value: 0.8 and parameters: {'random_state': 25, 'min_samples_split': 8, 'max_depth': 30, 'n_estimators': 967, 'min_samples_leaf': 5}. Best is trial 3 with value: 0.8541666666666666.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8\n",
            "Model F1-score: 0.799559\n",
            "Model precision-score: 0.804711\n",
            "Model recall-score: 0.8\n",
            "Model Parameters: {'random_state': 25, ' criterion': 8, 'max_depth': 30, 'n_estimators': 967, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:31,312]\u001b[0m Trial 6 finished with value: 0.825 and parameters: {'random_state': 0, 'min_samples_split': 1, 'max_depth': 2, 'n_estimators': 144, 'min_samples_leaf': 2}. Best is trial 3 with value: 0.8541666666666666.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.825\n",
            "Model F1-score: 0.824398\n",
            "Model precision-score: 0.833411\n",
            "Model recall-score: 0.825\n",
            "Model Parameters: {'random_state': 0, ' criterion': 1, 'max_depth': 2, 'n_estimators': 144, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:31,624]\u001b[0m Trial 7 finished with value: 0.8375 and parameters: {'random_state': None, 'min_samples_split': 5, 'max_depth': 7, 'n_estimators': 558, 'min_samples_leaf': 10}. Best is trial 3 with value: 0.8541666666666666.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8375\n",
            "Model F1-score: 0.839794\n",
            "Model precision-score: 0.847995\n",
            "Model recall-score: 0.8375\n",
            "Model Parameters: {'random_state': None, ' criterion': 5, 'max_depth': 7, 'n_estimators': 558, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:32,011]\u001b[0m Trial 8 finished with value: 0.8208333333333333 and parameters: {'random_state': 0, 'min_samples_split': 1, 'max_depth': 23, 'n_estimators': 939, 'min_samples_leaf': 6}. Best is trial 3 with value: 0.8541666666666666.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.820833\n",
            "Model F1-score: 0.820193\n",
            "Model precision-score: 0.828595\n",
            "Model recall-score: 0.820833\n",
            "Model Parameters: {'random_state': 0, ' criterion': 1, 'max_depth': 23, 'n_estimators': 939, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:32,402]\u001b[0m Trial 9 finished with value: 0.8208333333333333 and parameters: {'random_state': 0, 'min_samples_split': 3, 'max_depth': 3, 'n_estimators': 337, 'min_samples_leaf': 7}. Best is trial 3 with value: 0.8541666666666666.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.820833\n",
            "Model F1-score: 0.819996\n",
            "Model precision-score: 0.827913\n",
            "Model recall-score: 0.820833\n",
            "Model Parameters: {'random_state': 0, ' criterion': 3, 'max_depth': 3, 'n_estimators': 337, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:32,730]\u001b[0m Trial 10 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 14, 'n_estimators': 390, 'min_samples_leaf': 3}. Best is trial 10 with value: 0.8583333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.856748\n",
            "Model precision-score: 0.860595\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 14, 'n_estimators': 390, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:33,068]\u001b[0m Trial 11 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 14, 'n_estimators': 404, 'min_samples_leaf': 3}. Best is trial 11 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861399\n",
            "Model precision-score: 0.864133\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 14, 'n_estimators': 404, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:33,394]\u001b[0m Trial 12 finished with value: 0.8333333333333334 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 15, 'n_estimators': 328, 'min_samples_leaf': 3}. Best is trial 11 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.833333\n",
            "Model F1-score: 0.831911\n",
            "Model precision-score: 0.834447\n",
            "Model recall-score: 0.833333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 15, 'n_estimators': 328, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:33,733]\u001b[0m Trial 13 finished with value: 0.8375 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 13, 'n_estimators': 321, 'min_samples_leaf': 1}. Best is trial 11 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8375\n",
            "Model F1-score: 0.835845\n",
            "Model precision-score: 0.840884\n",
            "Model recall-score: 0.8375\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 13, 'n_estimators': 321, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:34,062]\u001b[0m Trial 14 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 16, 'n_estimators': 432, 'min_samples_leaf': 3}. Best is trial 11 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853294\n",
            "Model precision-score: 0.855244\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 16, 'n_estimators': 432, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:34,396]\u001b[0m Trial 15 finished with value: 0.8666666666666667 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 11, 'n_estimators': 140, 'min_samples_leaf': 3}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.866667\n",
            "Model F1-score: 0.865519\n",
            "Model precision-score: 0.871066\n",
            "Model recall-score: 0.866667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 11, 'n_estimators': 140, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:34,734]\u001b[0m Trial 16 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 10, 'n_estimators': 124, 'min_samples_leaf': 5}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.860762\n",
            "Model precision-score: 0.865519\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 10, 'n_estimators': 124, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:35,072]\u001b[0m Trial 17 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 6, 'max_depth': 22, 'n_estimators': 217, 'min_samples_leaf': 2}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.856546\n",
            "Model precision-score: 0.861436\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 6, 'max_depth': 22, 'n_estimators': 217, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:35,403]\u001b[0m Trial 18 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 10, 'n_estimators': 229, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857023\n",
            "Model precision-score: 0.861469\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 10, 'n_estimators': 229, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:35,728]\u001b[0m Trial 19 finished with value: 0.8291666666666667 and parameters: {'random_state': None, 'min_samples_split': 2, 'max_depth': 6, 'n_estimators': 500, 'min_samples_leaf': 2}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.829167\n",
            "Model F1-score: 0.831168\n",
            "Model precision-score: 0.843312\n",
            "Model recall-score: 0.829167\n",
            "Model Parameters: {'random_state': None, ' criterion': 2, 'max_depth': 6, 'n_estimators': 500, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:36,071]\u001b[0m Trial 20 finished with value: 0.7916666666666666 and parameters: {'random_state': 25, 'min_samples_split': 2, 'max_depth': 8, 'n_estimators': 623, 'min_samples_leaf': 6}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.791667\n",
            "Model F1-score: 0.79105\n",
            "Model precision-score: 0.80046\n",
            "Model recall-score: 0.791667\n",
            "Model Parameters: {'random_state': 25, ' criterion': 2, 'max_depth': 8, 'n_estimators': 623, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:36,403]\u001b[0m Trial 21 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 11, 'n_estimators': 144, 'min_samples_leaf': 5}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.86138\n",
            "Model precision-score: 0.863109\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 11, 'n_estimators': 144, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:36,727]\u001b[0m Trial 22 finished with value: 0.8666666666666667 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 11, 'n_estimators': 112, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.866667\n",
            "Model F1-score: 0.865922\n",
            "Model precision-score: 0.869616\n",
            "Model recall-score: 0.866667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 11, 'n_estimators': 112, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:37,310]\u001b[0m Trial 23 finished with value: 0.8416666666666667 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 17, 'n_estimators': 235, 'min_samples_leaf': 3}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.841667\n",
            "Model F1-score: 0.839851\n",
            "Model precision-score: 0.846678\n",
            "Model recall-score: 0.841667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 17, 'n_estimators': 235, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:38,115]\u001b[0m Trial 24 finished with value: 0.8291666666666667 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 12, 'n_estimators': 110, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.829167\n",
            "Model F1-score: 0.828211\n",
            "Model precision-score: 0.82948\n",
            "Model recall-score: 0.829167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 12, 'n_estimators': 110, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:39,588]\u001b[0m Trial 25 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 7, 'max_depth': 8, 'n_estimators': 253, 'min_samples_leaf': 2}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853473\n",
            "Model precision-score: 0.854738\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 7, 'max_depth': 8, 'n_estimators': 253, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:40,976]\u001b[0m Trial 26 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 18, 'n_estimators': 191, 'min_samples_leaf': 1}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857289\n",
            "Model precision-score: 0.859064\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 18, 'n_estimators': 191, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:41,829]\u001b[0m Trial 27 finished with value: 0.8458333333333333 and parameters: {'random_state': 100, 'min_samples_split': 10, 'max_depth': 11, 'n_estimators': 437, 'min_samples_leaf': 3}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.845833\n",
            "Model F1-score: 0.843942\n",
            "Model precision-score: 0.849422\n",
            "Model recall-score: 0.845833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 10, 'max_depth': 11, 'n_estimators': 437, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:42,813]\u001b[0m Trial 28 finished with value: 0.8083333333333333 and parameters: {'random_state': 25, 'min_samples_split': 3, 'max_depth': 13, 'n_estimators': 273, 'min_samples_leaf': 7}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.808333\n",
            "Model F1-score: 0.807855\n",
            "Model precision-score: 0.81107\n",
            "Model recall-score: 0.808333\n",
            "Model Parameters: {'random_state': 25, ' criterion': 3, 'max_depth': 13, 'n_estimators': 273, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:43,586]\u001b[0m Trial 29 finished with value: 0.775 and parameters: {'random_state': None, 'min_samples_split': 2, 'max_depth': 5, 'n_estimators': 784, 'min_samples_leaf': 8}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.775\n",
            "Model F1-score: 0.775804\n",
            "Model precision-score: 0.779594\n",
            "Model recall-score: 0.775\n",
            "Model Parameters: {'random_state': None, ' criterion': 2, 'max_depth': 5, 'n_estimators': 784, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:44,124]\u001b[0m Trial 30 finished with value: 0.8416666666666667 and parameters: {'random_state': None, 'min_samples_split': 4, 'max_depth': 9, 'n_estimators': 180, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.841667\n",
            "Model F1-score: 0.841499\n",
            "Model precision-score: 0.848858\n",
            "Model recall-score: 0.841667\n",
            "Model Parameters: {'random_state': None, ' criterion': 4, 'max_depth': 9, 'n_estimators': 180, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:44,881]\u001b[0m Trial 31 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 11, 'n_estimators': 112, 'min_samples_leaf': 5}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857525\n",
            "Model precision-score: 0.860761\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 11, 'n_estimators': 112, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:45,483]\u001b[0m Trial 32 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 1, 'max_depth': 9, 'n_estimators': 105, 'min_samples_leaf': 6}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.849291\n",
            "Model precision-score: 0.849981\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 1, 'max_depth': 9, 'n_estimators': 105, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:46,143]\u001b[0m Trial 33 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 13, 'n_estimators': 294, 'min_samples_leaf': 5}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.852504\n",
            "Model precision-score: 0.856196\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 13, 'n_estimators': 294, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:46,812]\u001b[0m Trial 34 finished with value: 0.8666666666666667 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 7, 'n_estimators': 378, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.866667\n",
            "Model F1-score: 0.865235\n",
            "Model precision-score: 0.868544\n",
            "Model recall-score: 0.866667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 7, 'n_estimators': 378, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:47,585]\u001b[0m Trial 35 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 7, 'n_estimators': 417, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857009\n",
            "Model precision-score: 0.859766\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 7, 'n_estimators': 417, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:48,124]\u001b[0m Trial 36 finished with value: 0.7916666666666666 and parameters: {'random_state': 25, 'min_samples_split': 1, 'max_depth': 6, 'n_estimators': 489, 'min_samples_leaf': 3}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.791667\n",
            "Model F1-score: 0.7905\n",
            "Model precision-score: 0.799184\n",
            "Model recall-score: 0.791667\n",
            "Model Parameters: {'random_state': 25, ' criterion': 1, 'max_depth': 6, 'n_estimators': 489, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:48,801]\u001b[0m Trial 37 finished with value: 0.8666666666666667 and parameters: {'random_state': 100, 'min_samples_split': 6, 'max_depth': 5, 'n_estimators': 375, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.866667\n",
            "Model F1-score: 0.865691\n",
            "Model precision-score: 0.867711\n",
            "Model recall-score: 0.866667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 6, 'max_depth': 5, 'n_estimators': 375, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:49,685]\u001b[0m Trial 38 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 7, 'max_depth': 5, 'n_estimators': 621, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861158\n",
            "Model precision-score: 0.866139\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 7, 'max_depth': 5, 'n_estimators': 621, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:50,515]\u001b[0m Trial 39 finished with value: 0.8208333333333333 and parameters: {'random_state': 0, 'min_samples_split': 7, 'max_depth': 4, 'n_estimators': 368, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.820833\n",
            "Model F1-score: 0.820078\n",
            "Model precision-score: 0.826391\n",
            "Model recall-score: 0.820833\n",
            "Model Parameters: {'random_state': 0, ' criterion': 7, 'max_depth': 4, 'n_estimators': 368, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:51,223]\u001b[0m Trial 40 finished with value: 0.7833333333333333 and parameters: {'random_state': 25, 'min_samples_split': 6, 'max_depth': 8, 'n_estimators': 169, 'min_samples_leaf': 2}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.783333\n",
            "Model F1-score: 0.782262\n",
            "Model precision-score: 0.787484\n",
            "Model recall-score: 0.783333\n",
            "Model Parameters: {'random_state': 25, ' criterion': 6, 'max_depth': 8, 'n_estimators': 169, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:52,793]\u001b[0m Trial 41 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 7, 'n_estimators': 573, 'min_samples_leaf': 3}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.84829\n",
            "Model precision-score: 0.851272\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 7, 'n_estimators': 573, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:54,291]\u001b[0m Trial 42 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 6, 'max_depth': 15, 'n_estimators': 472, 'min_samples_leaf': 5}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857142\n",
            "Model precision-score: 0.859712\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 6, 'max_depth': 15, 'n_estimators': 472, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:55,548]\u001b[0m Trial 43 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 8, 'max_depth': 19, 'n_estimators': 389, 'min_samples_leaf': 4}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.856835\n",
            "Model precision-score: 0.861504\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 8, 'max_depth': 19, 'n_estimators': 389, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:56,301]\u001b[0m Trial 44 finished with value: 0.8666666666666667 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 14, 'n_estimators': 292, 'min_samples_leaf': 2}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.866667\n",
            "Model F1-score: 0.865368\n",
            "Model precision-score: 0.868412\n",
            "Model recall-score: 0.866667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 14, 'n_estimators': 292, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:57,063]\u001b[0m Trial 45 finished with value: 0.8208333333333333 and parameters: {'random_state': 0, 'min_samples_split': 1, 'max_depth': 10, 'n_estimators': 288, 'min_samples_leaf': 1}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.820833\n",
            "Model F1-score: 0.820728\n",
            "Model precision-score: 0.828071\n",
            "Model recall-score: 0.820833\n",
            "Model Parameters: {'random_state': 0, ' criterion': 1, 'max_depth': 10, 'n_estimators': 288, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:58,051]\u001b[0m Trial 46 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 12, 'n_estimators': 198, 'min_samples_leaf': 2}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861063\n",
            "Model precision-score: 0.865844\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 12, 'n_estimators': 198, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:58,878]\u001b[0m Trial 47 finished with value: 0.8416666666666667 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 16, 'n_estimators': 358, 'min_samples_leaf': 6}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.841667\n",
            "Model F1-score: 0.840474\n",
            "Model precision-score: 0.843448\n",
            "Model recall-score: 0.841667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 16, 'n_estimators': 358, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:12:59,557]\u001b[0m Trial 48 finished with value: 0.8125 and parameters: {'random_state': None, 'min_samples_split': 3, 'max_depth': 21, 'n_estimators': 314, 'min_samples_leaf': 2}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8125\n",
            "Model F1-score: 0.811826\n",
            "Model precision-score: 0.814021\n",
            "Model recall-score: 0.8125\n",
            "Model Parameters: {'random_state': None, ' criterion': 3, 'max_depth': 21, 'n_estimators': 314, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:00,354]\u001b[0m Trial 49 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 2, 'n_estimators': 159, 'min_samples_leaf': 3}. Best is trial 15 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.858048\n",
            "Model precision-score: 0.860285\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 2, 'n_estimators': 159, 'min_samples_leaf': 3}\n",
            "Number of finished trials: 50\n",
            "Best trial:\n",
            "  Value: 0.8666666666666667\n",
            "  Params: \n",
            "    random_state: 100\n",
            "    min_samples_split: 2\n",
            "    max_depth: 11\n",
            "    n_estimators: 140\n",
            "    min_samples_leaf: 3\n"
          ]
        }
      ],
      "source": [
        "from scipy.sparse.construct import random\n",
        "import optuna\n",
        "from sklearn.ensemble import RandomForestClassifier\n",
        "\n",
        "def objective(trial):\n",
        "    param = {\n",
        "        'random_state': trial.suggest_categorical('random_state', [0, 25, 100, None]),\n",
        "       \" criterion\" : trial.suggest_int('min_samples_split', 1,10),\n",
        "        \"max_depth\" : trial.suggest_int(\"max_depth\", 2, 32, log=True),\n",
        "        \"n_estimators\" : trial.suggest_int(\"n_estimators\", 100,1000),\n",
        "        \"min_samples_leaf\" : trial.suggest_int(\"min_samples_leaf\", 1,10)\n",
        "        # 'bootstrap': trial.suggest_categorical('bootstrap' , [True, False]),\n",
        "        # 'max_features': trial.suggest_categorical(\"max_features\" , ['auto', 'sqrt'])\n",
        "    }\n",
        "\n",
        "    suggested_random_state = param['random_state']  # also use the suggested random state value in train_test_split()\n",
        "    \n",
        "    \n",
        "    X_train, X_test, y_train, y_test = train_test_split(X_knn, y, test_size=0.2, shuffle=True, random_state=suggested_random_state)\n",
        "    clf = RandomForestClassifier()\n",
        "\n",
        "    clf.fit(X_train, y_train)\n",
        "    y_pred = clf.predict(X_test)\n",
        "    acc = accuracy_score(y_pred, y_test)\n",
        "    f1 = f1_score(y_pred, y_test, average='weighted') # Calculate F1-score #average='weighted'\n",
        "    precision = precision_score(y_pred, y_test, average='weighted') # Calculate Precision\n",
        "    recall = recall_score(y_pred, y_test, average='weighted')  # Calculate Recall\n",
        "    print(f\"Model Accuracy: {round(acc, 6)}\")\n",
        "    print(f\"Model F1-score: {round(f1, 6)}\")\n",
        "    print(f\"Model precision-score: {round(precision, 6)}\")\n",
        "    print(f\"Model recall-score: {round(recall, 6)}\")\n",
        "\n",
        "    print(f\"Model Parameters: {param}\")\n",
        "    \n",
        "    return acc  # return our objective value\n",
        "    return f1\n",
        "    return precision\n",
        "    return recall\n",
        "\n",
        "if __name__ == \"__main__\":\n",
        "    study = optuna.create_study(\n",
        "        direction=\"maximize\",\n",
        "        sampler=optuna.samplers.TPESampler()\n",
        "    )\n",
        "    study.optimize(objective, n_trials=50)\n",
        "\n",
        "    print(\"Number of finished trials: {}\".format(len(study.trials)))\n",
        "\n",
        "    print(\"Best trial:\")\n",
        "    trial = study.best_trial\n",
        "\n",
        "    print(\"  Value: {}\".format(trial.value))\n",
        "\n",
        "    print(\"  Params: \")\n",
        "    for key, value in trial.params.items():\n",
        "        print(\"    {}: {}\".format(key, value))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "bY1N2_HKpQ40"
      },
      "source": [
        "### ROC-AUC of hypertuned RFC"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 122,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gHrRfj1vkGiZ",
        "outputId": "96e0d873-917d-4419-8e1c-c295cc51cfd8"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "AUC of RFC: 0.9556\n",
            "10-fold CV mean of RFC: 0.9312\n",
            "10-fold CV std of RFC: 0.0364\n"
          ]
        }
      ],
      "source": [
        "knn_pipeline2= Pipeline(steps=[('imputer', KNNImputer(n_neighbors=3)),('RobustScaler', RobustScaler())])\n",
        "X_knn2 =knn_pipeline2.fit_transform(X_full)\n",
        "X_train_fullkn, X_test_fullkn, y_train_fullkn, y_test_fullkn = train_test_split(X_knn2, y_full, test_size=0.20, random_state=42)\n",
        "\n",
        "## Hyper-tuned RFC\n",
        "model_rf_hyperkn = RandomForestClassifier( ).fit(X_train_fullkn, y_train_fullkn)\n",
        "probs_rf_hyper = model_rf_hyperkn .predict_proba(X_test_fullkn)#[:, 1]\n",
        "\n",
        "auc_rf = roc_auc_score(y_test_fullkn, probs_rf_hyper, multi_class='ovr')\n",
        "print('AUC of RFC: {:.4f}'.format(auc_rf))\n",
        "\n",
        "\n",
        "RFC_KNN = RandomForestClassifier() #random_state= 100, min_samples_split= 6,\n",
        "    \n",
        "scores_cv_rf= cross_val_score(RFC_KNN, X_knn2, y_full, cv=10, scoring='roc_auc_ovr').mean()\n",
        "scores_cv_rf_std= cross_val_score(RFC_KNN, X_knn2, y_full, cv=10, scoring='roc_auc_ovr').std()\n",
        "#print('10-fold CV of RFC', (scores_cv_rf))\n",
        "print('10-fold CV mean of RFC: {:.4f}'.format(scores_cv_rf))\n",
        "print('10-fold CV std of RFC: {:.4f}'.format(scores_cv_rf_std))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "yjk_MgxRmkkB"
      },
      "source": [
        "### Experiment: 2 = Simple Imputer with Strategy Median "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 124,
      "metadata": {
        "id": "X7iqoWQUmkkB"
      },
      "outputs": [],
      "source": [
        "num_features = X.select_dtypes(exclude=\"object\").columns\n",
        "\n",
        "# Fit the Simple imputer with strategy median\n",
        "median_pipeline = Pipeline(steps=[\n",
        "    ('imputer', SimpleImputer(strategy='median')),\n",
        "    ('RobustScaler', RobustScaler())\n",
        "])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 125,
      "metadata": {
        "id": "M7q92I-WmkkB"
      },
      "outputs": [],
      "source": [
        "# Fit X with median_pipeline\n",
        "X_median = median_pipeline.fit_transform(X)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 126,
      "metadata": {
        "id": "NfbuhaBSmkkC",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "1bfa9fc2-0caa-4065-f475-703e2f7c6f35"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Random Forest\n",
            "Model performance for Training set\n",
            "- Accuracy: 1.0000\n",
            "- F1 score: 1.0000\n",
            "- Precision: 1.0000\n",
            "- Recall: 1.0000\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.8083\n",
            "- F1 score: 0.8086\n",
            "- Precision: 0.8180\n",
            "- Recall: 0.8083\n",
            "===================================\n",
            "\n",
            "\n",
            "Decision Tree\n",
            "Model performance for Training set\n",
            "- Accuracy: 1.0000\n",
            "- F1 score: 1.0000\n",
            "- Precision: 1.0000\n",
            "- Recall: 1.0000\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7375\n",
            "- F1 score: 0.7384\n",
            "- Precision: 0.7424\n",
            "- Recall: 0.7375\n",
            "===================================\n",
            "\n",
            "\n",
            "K-Neighbors Classifier\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.7937\n",
            "- F1 score: 0.7927\n",
            "- Precision: 0.7933\n",
            "- Recall: 0.7937\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7042\n",
            "- F1 score: 0.7043\n",
            "- Precision: 0.7118\n",
            "- Recall: 0.7042\n",
            "===================================\n",
            "\n",
            "\n",
            "XGBClassifier\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.9458\n",
            "- F1 score: 0.9458\n",
            "- Precision: 0.9458\n",
            "- Recall: 0.9458\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.8125\n",
            "- F1 score: 0.8129\n",
            "- Precision: 0.8195\n",
            "- Recall: 0.8125\n",
            "===================================\n",
            "\n",
            "\n",
            "SVM\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.8031\n",
            "- F1 score: 0.8025\n",
            "- Precision: 0.8082\n",
            "- Recall: 0.8031\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7292\n",
            "- F1 score: 0.7286\n",
            "- Precision: 0.7373\n",
            "- Recall: 0.7292\n",
            "===================================\n",
            "\n",
            "\n"
          ]
        }
      ],
      "source": [
        "# Training the models\n",
        "report_median = evaluate_models(X_median, y, models)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "G5P02QpOTtJ-"
      },
      "source": [
        "Optuna for hyper-parameter tuning of RFC model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 128,
      "metadata": {
        "id": "pbwRwm9Ta-6j",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "b46405ac-76ef-4038-821f-4844e32d907f"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:14,805]\u001b[0m A new study created in memory with name: no-name-ade4d966-9f15-4bf2-b6a3-06dd600ada06\u001b[0m\n",
            "\u001b[32m[I 2023-02-19 13:13:15,122]\u001b[0m Trial 0 finished with value: 0.8041666666666667 and parameters: {'random_state': 0, 'min_samples_split': 7, 'max_depth': 2, 'n_estimators': 252, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.8041666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.804167\n",
            "Model F1-score: 0.8031\n",
            "Model precision-score: 0.812018\n",
            "Model recall-score: 0.804167\n",
            "Model Parameters: {'random_state': 0, ' criterion': 7, 'max_depth': 2, 'n_estimators': 252, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:15,438]\u001b[0m Trial 1 finished with value: 0.8416666666666667 and parameters: {'random_state': 100, 'min_samples_split': 1, 'max_depth': 12, 'n_estimators': 838, 'min_samples_leaf': 7}. Best is trial 1 with value: 0.8416666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.841667\n",
            "Model F1-score: 0.840358\n",
            "Model precision-score: 0.846985\n",
            "Model recall-score: 0.841667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 1, 'max_depth': 12, 'n_estimators': 838, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:15,744]\u001b[0m Trial 2 finished with value: 0.8083333333333333 and parameters: {'random_state': 0, 'min_samples_split': 2, 'max_depth': 22, 'n_estimators': 995, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.8416666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.808333\n",
            "Model F1-score: 0.808617\n",
            "Model precision-score: 0.81601\n",
            "Model recall-score: 0.808333\n",
            "Model Parameters: {'random_state': 0, ' criterion': 2, 'max_depth': 22, 'n_estimators': 995, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:16,065]\u001b[0m Trial 3 finished with value: 0.8166666666666667 and parameters: {'random_state': 0, 'min_samples_split': 7, 'max_depth': 2, 'n_estimators': 670, 'min_samples_leaf': 6}. Best is trial 1 with value: 0.8416666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.816667\n",
            "Model F1-score: 0.81619\n",
            "Model precision-score: 0.822782\n",
            "Model recall-score: 0.816667\n",
            "Model Parameters: {'random_state': 0, ' criterion': 7, 'max_depth': 2, 'n_estimators': 670, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:16,375]\u001b[0m Trial 4 finished with value: 0.8666666666666667 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 25, 'n_estimators': 892, 'min_samples_leaf': 2}. Best is trial 4 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.866667\n",
            "Model F1-score: 0.865338\n",
            "Model precision-score: 0.867841\n",
            "Model recall-score: 0.866667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 25, 'n_estimators': 892, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:16,696]\u001b[0m Trial 5 finished with value: 0.8291666666666667 and parameters: {'random_state': None, 'min_samples_split': 9, 'max_depth': 25, 'n_estimators': 994, 'min_samples_leaf': 1}. Best is trial 4 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.829167\n",
            "Model F1-score: 0.829937\n",
            "Model precision-score: 0.836781\n",
            "Model recall-score: 0.829167\n",
            "Model Parameters: {'random_state': None, ' criterion': 9, 'max_depth': 25, 'n_estimators': 994, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:17,012]\u001b[0m Trial 6 finished with value: 0.8041666666666667 and parameters: {'random_state': 0, 'min_samples_split': 1, 'max_depth': 4, 'n_estimators': 824, 'min_samples_leaf': 10}. Best is trial 4 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.804167\n",
            "Model F1-score: 0.804013\n",
            "Model precision-score: 0.810027\n",
            "Model recall-score: 0.804167\n",
            "Model Parameters: {'random_state': 0, ' criterion': 1, 'max_depth': 4, 'n_estimators': 824, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:17,320]\u001b[0m Trial 7 finished with value: 0.8291666666666667 and parameters: {'random_state': 0, 'min_samples_split': 4, 'max_depth': 13, 'n_estimators': 755, 'min_samples_leaf': 9}. Best is trial 4 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.829167\n",
            "Model F1-score: 0.828514\n",
            "Model precision-score: 0.836348\n",
            "Model recall-score: 0.829167\n",
            "Model Parameters: {'random_state': 0, ' criterion': 4, 'max_depth': 13, 'n_estimators': 755, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:17,647]\u001b[0m Trial 8 finished with value: 0.8125 and parameters: {'random_state': None, 'min_samples_split': 9, 'max_depth': 8, 'n_estimators': 235, 'min_samples_leaf': 5}. Best is trial 4 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8125\n",
            "Model F1-score: 0.812044\n",
            "Model precision-score: 0.817915\n",
            "Model recall-score: 0.8125\n",
            "Model Parameters: {'random_state': None, ' criterion': 9, 'max_depth': 8, 'n_estimators': 235, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:17,958]\u001b[0m Trial 9 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 13, 'n_estimators': 384, 'min_samples_leaf': 8}. Best is trial 4 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.852501\n",
            "Model precision-score: 0.855635\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 13, 'n_estimators': 384, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:18,284]\u001b[0m Trial 10 finished with value: 0.8041666666666667 and parameters: {'random_state': 25, 'min_samples_split': 5, 'max_depth': 31, 'n_estimators': 532, 'min_samples_leaf': 4}. Best is trial 4 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.804167\n",
            "Model F1-score: 0.803563\n",
            "Model precision-score: 0.809456\n",
            "Model recall-score: 0.804167\n",
            "Model Parameters: {'random_state': 25, ' criterion': 5, 'max_depth': 31, 'n_estimators': 532, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:18,616]\u001b[0m Trial 11 finished with value: 0.8791666666666667 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 16, 'n_estimators': 445, 'min_samples_leaf': 8}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.879167\n",
            "Model F1-score: 0.877775\n",
            "Model precision-score: 0.881971\n",
            "Model recall-score: 0.879167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 16, 'n_estimators': 445, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:18,980]\u001b[0m Trial 12 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 20, 'n_estimators': 477, 'min_samples_leaf': 4}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.852901\n",
            "Model precision-score: 0.855729\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 20, 'n_estimators': 477, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:19,497]\u001b[0m Trial 13 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 31, 'n_estimators': 384, 'min_samples_leaf': 7}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.86173\n",
            "Model precision-score: 0.864024\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 31, 'n_estimators': 384, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:20,002]\u001b[0m Trial 14 finished with value: 0.8458333333333333 and parameters: {'random_state': 100, 'min_samples_split': 6, 'max_depth': 17, 'n_estimators': 114, 'min_samples_leaf': 3}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.845833\n",
            "Model F1-score: 0.844915\n",
            "Model precision-score: 0.846748\n",
            "Model recall-score: 0.845833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 6, 'max_depth': 17, 'n_estimators': 114, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:20,526]\u001b[0m Trial 15 finished with value: 0.8041666666666667 and parameters: {'random_state': 25, 'min_samples_split': 3, 'max_depth': 8, 'n_estimators': 695, 'min_samples_leaf': 10}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.804167\n",
            "Model F1-score: 0.80298\n",
            "Model precision-score: 0.808493\n",
            "Model recall-score: 0.804167\n",
            "Model Parameters: {'random_state': 25, ' criterion': 3, 'max_depth': 8, 'n_estimators': 695, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:21,056]\u001b[0m Trial 16 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 6, 'max_depth': 17, 'n_estimators': 579, 'min_samples_leaf': 8}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848601\n",
            "Model precision-score: 0.85217\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 6, 'max_depth': 17, 'n_estimators': 579, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:21,565]\u001b[0m Trial 17 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 24, 'n_estimators': 412, 'min_samples_leaf': 6}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861792\n",
            "Model precision-score: 0.863091\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 24, 'n_estimators': 412, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:22,085]\u001b[0m Trial 18 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 31, 'n_estimators': 607, 'min_samples_leaf': 1}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853446\n",
            "Model precision-score: 0.854346\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 31, 'n_estimators': 607, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:22,481]\u001b[0m Trial 19 finished with value: 0.8 and parameters: {'random_state': 25, 'min_samples_split': 10, 'max_depth': 10, 'n_estimators': 854, 'min_samples_leaf': 5}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8\n",
            "Model F1-score: 0.798923\n",
            "Model precision-score: 0.804084\n",
            "Model recall-score: 0.8\n",
            "Model Parameters: {'random_state': 25, ' criterion': 10, 'max_depth': 10, 'n_estimators': 854, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:22,810]\u001b[0m Trial 20 finished with value: 0.8583333333333333 and parameters: {'random_state': None, 'min_samples_split': 7, 'max_depth': 17, 'n_estimators': 251, 'min_samples_leaf': 8}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.858749\n",
            "Model precision-score: 0.860993\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': None, ' criterion': 7, 'max_depth': 17, 'n_estimators': 251, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:23,137]\u001b[0m Trial 21 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 31, 'n_estimators': 379, 'min_samples_leaf': 7}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861433\n",
            "Model precision-score: 0.865677\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 31, 'n_estimators': 379, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:23,470]\u001b[0m Trial 22 finished with value: 0.8375 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 24, 'n_estimators': 484, 'min_samples_leaf': 7}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8375\n",
            "Model F1-score: 0.836474\n",
            "Model precision-score: 0.841633\n",
            "Model recall-score: 0.8375\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 24, 'n_estimators': 484, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:23,800]\u001b[0m Trial 23 finished with value: 0.8375 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 32, 'n_estimators': 337, 'min_samples_leaf': 9}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8375\n",
            "Model F1-score: 0.835759\n",
            "Model precision-score: 0.839765\n",
            "Model recall-score: 0.8375\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 32, 'n_estimators': 337, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:24,130]\u001b[0m Trial 24 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 18, 'n_estimators': 470, 'min_samples_leaf': 6}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857294\n",
            "Model precision-score: 0.860324\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 18, 'n_estimators': 470, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:24,454]\u001b[0m Trial 25 finished with value: 0.8708333333333333 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 23, 'n_estimators': 322, 'min_samples_leaf': 9}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.870833\n",
            "Model F1-score: 0.869613\n",
            "Model precision-score: 0.871829\n",
            "Model recall-score: 0.870833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 23, 'n_estimators': 322, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:24,793]\u001b[0m Trial 26 finished with value: 0.8458333333333333 and parameters: {'random_state': 100, 'min_samples_split': 6, 'max_depth': 14, 'n_estimators': 142, 'min_samples_leaf': 9}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.845833\n",
            "Model F1-score: 0.84445\n",
            "Model precision-score: 0.851043\n",
            "Model recall-score: 0.845833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 6, 'max_depth': 14, 'n_estimators': 142, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:25,135]\u001b[0m Trial 27 finished with value: 0.875 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 20, 'n_estimators': 626, 'min_samples_leaf': 10}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.875\n",
            "Model F1-score: 0.874386\n",
            "Model precision-score: 0.876935\n",
            "Model recall-score: 0.875\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 20, 'n_estimators': 626, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:25,458]\u001b[0m Trial 28 finished with value: 0.8458333333333333 and parameters: {'random_state': None, 'min_samples_split': 3, 'max_depth': 15, 'n_estimators': 295, 'min_samples_leaf': 10}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.845833\n",
            "Model F1-score: 0.846108\n",
            "Model precision-score: 0.855535\n",
            "Model recall-score: 0.845833\n",
            "Model Parameters: {'random_state': None, ' criterion': 3, 'max_depth': 15, 'n_estimators': 295, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:25,792]\u001b[0m Trial 29 finished with value: 0.7833333333333333 and parameters: {'random_state': 25, 'min_samples_split': 2, 'max_depth': 11, 'n_estimators': 200, 'min_samples_leaf': 9}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.783333\n",
            "Model F1-score: 0.782583\n",
            "Model precision-score: 0.78896\n",
            "Model recall-score: 0.783333\n",
            "Model Parameters: {'random_state': 25, ' criterion': 2, 'max_depth': 11, 'n_estimators': 200, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:26,122]\u001b[0m Trial 30 finished with value: 0.8375 and parameters: {'random_state': 100, 'min_samples_split': 1, 'max_depth': 20, 'n_estimators': 621, 'min_samples_leaf': 10}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8375\n",
            "Model F1-score: 0.835593\n",
            "Model precision-score: 0.837944\n",
            "Model recall-score: 0.8375\n",
            "Model Parameters: {'random_state': 100, ' criterion': 1, 'max_depth': 20, 'n_estimators': 621, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:26,453]\u001b[0m Trial 31 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 21, 'n_estimators': 912, 'min_samples_leaf': 8}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857558\n",
            "Model precision-score: 0.861137\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 21, 'n_estimators': 912, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:26,790]\u001b[0m Trial 32 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 16, 'n_estimators': 316, 'min_samples_leaf': 9}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861543\n",
            "Model precision-score: 0.865965\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 16, 'n_estimators': 316, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:27,125]\u001b[0m Trial 33 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 25, 'n_estimators': 528, 'min_samples_leaf': 3}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848447\n",
            "Model precision-score: 0.851709\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 25, 'n_estimators': 528, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:27,451]\u001b[0m Trial 34 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 20, 'n_estimators': 754, 'min_samples_leaf': 2}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861643\n",
            "Model precision-score: 0.865582\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 20, 'n_estimators': 754, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:27,795]\u001b[0m Trial 35 finished with value: 0.7916666666666666 and parameters: {'random_state': 0, 'min_samples_split': 8, 'max_depth': 11, 'n_estimators': 921, 'min_samples_leaf': 10}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.791667\n",
            "Model F1-score: 0.790712\n",
            "Model precision-score: 0.798492\n",
            "Model recall-score: 0.791667\n",
            "Model Parameters: {'random_state': 0, ' criterion': 8, 'max_depth': 11, 'n_estimators': 921, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:28,124]\u001b[0m Trial 36 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 26, 'n_estimators': 676, 'min_samples_leaf': 8}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.856649\n",
            "Model precision-score: 0.861843\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 26, 'n_estimators': 676, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:28,455]\u001b[0m Trial 37 finished with value: 0.8125 and parameters: {'random_state': 0, 'min_samples_split': 5, 'max_depth': 15, 'n_estimators': 440, 'min_samples_leaf': 9}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8125\n",
            "Model F1-score: 0.812256\n",
            "Model precision-score: 0.815919\n",
            "Model recall-score: 0.8125\n",
            "Model Parameters: {'random_state': 0, ' criterion': 5, 'max_depth': 15, 'n_estimators': 440, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:28,788]\u001b[0m Trial 38 finished with value: 0.8541666666666666 and parameters: {'random_state': None, 'min_samples_split': 1, 'max_depth': 22, 'n_estimators': 744, 'min_samples_leaf': 10}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.854963\n",
            "Model precision-score: 0.857165\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': None, ' criterion': 1, 'max_depth': 22, 'n_estimators': 744, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:29,126]\u001b[0m Trial 39 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 27, 'n_estimators': 950, 'min_samples_leaf': 1}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848768\n",
            "Model precision-score: 0.851867\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 27, 'n_estimators': 950, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:29,456]\u001b[0m Trial 40 finished with value: 0.8458333333333333 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 19, 'n_estimators': 860, 'min_samples_leaf': 3}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.845833\n",
            "Model F1-score: 0.844262\n",
            "Model precision-score: 0.849177\n",
            "Model recall-score: 0.845833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 19, 'n_estimators': 860, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:29,804]\u001b[0m Trial 41 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 27, 'n_estimators': 357, 'min_samples_leaf': 7}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857042\n",
            "Model precision-score: 0.860485\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 27, 'n_estimators': 357, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:30,146]\u001b[0m Trial 42 finished with value: 0.8458333333333333 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 22, 'n_estimators': 286, 'min_samples_leaf': 6}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.845833\n",
            "Model F1-score: 0.844932\n",
            "Model precision-score: 0.849166\n",
            "Model recall-score: 0.845833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 22, 'n_estimators': 286, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:30,479]\u001b[0m Trial 43 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 27, 'n_estimators': 421, 'min_samples_leaf': 7}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857706\n",
            "Model precision-score: 0.858933\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 27, 'n_estimators': 421, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:30,812]\u001b[0m Trial 44 finished with value: 0.8208333333333333 and parameters: {'random_state': 0, 'min_samples_split': 6, 'max_depth': 13, 'n_estimators': 799, 'min_samples_leaf': 9}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.820833\n",
            "Model F1-score: 0.820746\n",
            "Model precision-score: 0.826654\n",
            "Model recall-score: 0.820833\n",
            "Model Parameters: {'random_state': 0, ' criterion': 6, 'max_depth': 13, 'n_estimators': 799, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:31,143]\u001b[0m Trial 45 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 23, 'n_estimators': 184, 'min_samples_leaf': 8}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.856835\n",
            "Model precision-score: 0.861504\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 23, 'n_estimators': 184, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:31,481]\u001b[0m Trial 46 finished with value: 0.7958333333333333 and parameters: {'random_state': 25, 'min_samples_split': 5, 'max_depth': 19, 'n_estimators': 531, 'min_samples_leaf': 8}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.795833\n",
            "Model F1-score: 0.795121\n",
            "Model precision-score: 0.800222\n",
            "Model recall-score: 0.795833\n",
            "Model Parameters: {'random_state': 25, ' criterion': 5, 'max_depth': 19, 'n_estimators': 531, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:31,808]\u001b[0m Trial 47 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 7, 'max_depth': 29, 'n_estimators': 387, 'min_samples_leaf': 4}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848993\n",
            "Model precision-score: 0.8507\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 7, 'max_depth': 29, 'n_estimators': 387, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:32,154]\u001b[0m Trial 48 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 32, 'n_estimators': 447, 'min_samples_leaf': 6}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.849206\n",
            "Model precision-score: 0.853774\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 32, 'n_estimators': 447, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:32,604]\u001b[0m Trial 49 finished with value: 0.7958333333333333 and parameters: {'random_state': None, 'min_samples_split': 3, 'max_depth': 28, 'n_estimators': 573, 'min_samples_leaf': 9}. Best is trial 11 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.795833\n",
            "Model F1-score: 0.796533\n",
            "Model precision-score: 0.803344\n",
            "Model recall-score: 0.795833\n",
            "Model Parameters: {'random_state': None, ' criterion': 3, 'max_depth': 28, 'n_estimators': 573, 'min_samples_leaf': 9}\n",
            "Number of finished trials: 50\n",
            "Best trial:\n",
            "  Value: 0.8791666666666667\n",
            "  Params: \n",
            "    random_state: 100\n",
            "    min_samples_split: 3\n",
            "    max_depth: 16\n",
            "    n_estimators: 445\n",
            "    min_samples_leaf: 8\n"
          ]
        }
      ],
      "source": [
        "from scipy.sparse.construct import random\n",
        "import optuna\n",
        "from sklearn.ensemble import RandomForestClassifier\n",
        "\n",
        "def objective(trial):\n",
        "    param = {\n",
        "        'random_state': trial.suggest_categorical('random_state', [0, 25, 100, None]),\n",
        "       \" criterion\" : trial.suggest_int('min_samples_split', 1,10),\n",
        "        \"max_depth\" : trial.suggest_int(\"max_depth\", 2, 32, log=True),\n",
        "        \"n_estimators\" : trial.suggest_int(\"n_estimators\", 100,1000),\n",
        "        \"min_samples_leaf\" : trial.suggest_int(\"min_samples_leaf\", 1,10)\n",
        "        # 'bootstrap': trial.suggest_categorical('bootstrap' , [True, False]),\n",
        "        # 'max_features': trial.suggest_categorical(\"max_features\" , ['auto', 'sqrt'])\n",
        "    }\n",
        "\n",
        "    suggested_random_state = param['random_state']  # also use the suggested random state value in train_test_split()\n",
        "    \n",
        "    \n",
        "    X_train, X_test, y_train, y_test = train_test_split(X_median, y, test_size=0.2, shuffle=True, random_state=suggested_random_state)\n",
        "    clf = RandomForestClassifier()\n",
        "\n",
        "    clf.fit(X_train, y_train)\n",
        "    y_pred = clf.predict(X_test)\n",
        "    acc = accuracy_score(y_pred, y_test)\n",
        "    f1 = f1_score(y_pred, y_test, average='weighted') # Calculate F1-score #average='weighted'\n",
        "    precision = precision_score(y_pred, y_test, average='weighted') # Calculate Precision\n",
        "    recall = recall_score(y_pred, y_test, average='weighted')  # Calculate Recall\n",
        "    print(f\"Model Accuracy: {round(acc, 6)}\")\n",
        "    print(f\"Model F1-score: {round(f1, 6)}\")\n",
        "    print(f\"Model precision-score: {round(precision, 6)}\")\n",
        "    print(f\"Model recall-score: {round(recall, 6)}\")\n",
        "\n",
        "    print(f\"Model Parameters: {param}\")\n",
        "    \n",
        "    return acc  # return our objective value\n",
        "    return f1\n",
        "    return precision\n",
        "    return recall\n",
        "\n",
        "\n",
        "\n",
        "if __name__ == \"__main__\":\n",
        "    study = optuna.create_study(\n",
        "        direction=\"maximize\",\n",
        "        sampler=optuna.samplers.TPESampler()\n",
        "    )\n",
        "    study.optimize(objective, n_trials=50)\n",
        "\n",
        "    print(\"Number of finished trials: {}\".format(len(study.trials)))\n",
        "\n",
        "    print(\"Best trial:\")\n",
        "    trial = study.best_trial\n",
        "\n",
        "    print(\"  Value: {}\".format(trial.value))\n",
        "\n",
        "    print(\"  Params: \")\n",
        "    for key, value in trial.params.items():\n",
        "        print(\"    {}: {}\".format(key, value))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "TwQobLuiHoDs"
      },
      "source": [
        "ROC-AUC score FOR HYPER-TUNED RFC"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 129,
      "metadata": {
        "id": "aUlYERIGCz2P",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "f5f57965-229c-48c9-bf57-29d01ed1f95e"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "- AUC of RFC: 0.9593\n",
            "10-fold CV mean of RFC: 0.9064\n",
            "10-fold CV std of RFC: 0.0429\n"
          ]
        }
      ],
      "source": [
        "median_pipeline12 = Pipeline(steps=[\n",
        "    ('imputer', SimpleImputer(strategy='median')),\n",
        "    ('RobustScaler', RobustScaler())\n",
        "])\n",
        "\n",
        "X_median12 =median_pipeline12.fit_transform(X_full)\n",
        "X_train_full_m, X_test_full_m, y_train_full_m, y_test_full_m = train_test_split(X_median12, y_full, test_size=0.20, random_state=42)\n",
        "\n",
        "## Hyper-tuned RFC\n",
        "model_rf_hyper2 = RandomForestClassifier(random_state= 100,\n",
        "    min_samples_split= 5,\n",
        "    max_depth= 11,\n",
        "    n_estimators= 349,\n",
        "    min_samples_leaf= 1\n",
        "                                                       ).fit(X_train_full_m, y_train_full_m)\n",
        "probs_rf_hyper_mc = model_rf_hyper2 .predict_proba(X_test_full_m)\n",
        "auc_rf_mc = roc_auc_score(y_test_full_m, probs_rf_hyper_mc, multi_class='ovr')\n",
        "print('- AUC of RFC: {:.4f}'.format(auc_rf_mc))\n",
        "\n",
        "\n",
        "RFC_mc = RandomForestClassifier( random_state= 100,\n",
        "    min_samples_split= 9,\n",
        "    max_depth= 16,\n",
        "    n_estimators= 221,\n",
        "    min_samples_leaf= 8  )\n",
        "\n",
        "scores_cv_rf= cross_val_score(RFC_mc, X_median12, y_full, cv=10, scoring='roc_auc_ovr').mean()\n",
        "scores_cv_rf_std= cross_val_score(RFC_mc, X_median12, y_full, cv=10, scoring='roc_auc_ovr').std()\n",
        "#print('10-fold CV of RFC', (scores_cv_rf))\n",
        "print('10-fold CV mean of RFC: {:.4f}'.format(scores_cv_rf))\n",
        "print('10-fold CV std of RFC: {:.4f}'.format(scores_cv_rf_std))\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "VgbT9-l4mkkC"
      },
      "source": [
        "### Experiment: 3 = MICE for Imputing Null values"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "In5Oel1cneam"
      },
      "source": []
    },
    {
      "cell_type": "code",
      "execution_count": 131,
      "metadata": {
        "id": "s6jMOCdYneqc",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "d385d20f-e2c1-48ef-ad25-d83d4c16068d"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: miceforest in /usr/local/lib/python3.8/dist-packages (5.6.3)\n",
            "Requirement already satisfied: blosc in /usr/local/lib/python3.8/dist-packages (from miceforest) (1.11.1)\n",
            "Requirement already satisfied: lightgbm>=3.3.1 in /usr/local/lib/python3.8/dist-packages (from miceforest) (3.3.5)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from miceforest) (1.21.6)\n",
            "Requirement already satisfied: dill in /usr/local/lib/python3.8/dist-packages (from miceforest) (0.3.6)\n",
            "Requirement already satisfied: wheel in /usr/local/lib/python3.8/dist-packages (from lightgbm>=3.3.1->miceforest) (0.38.4)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from lightgbm>=3.3.1->miceforest) (1.7.3)\n",
            "Requirement already satisfied: scikit-learn!=0.22.0 in /usr/local/lib/python3.8/dist-packages (from lightgbm>=3.3.1->miceforest) (1.0.2)\n",
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn!=0.22.0->lightgbm>=3.3.1->miceforest) (3.1.0)\n",
            "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.8/dist-packages (from scikit-learn!=0.22.0->lightgbm>=3.3.1->miceforest) (1.2.0)\n"
          ]
        }
      ],
      "source": [
        "pip install miceforest"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 132,
      "metadata": {
        "id": "9Ci2VDdgmkkC"
      },
      "outputs": [],
      "source": [
        "import miceforest as mf\n",
        "\n",
        "X_mice = X.copy()\n",
        "kernel = mf.ImputationKernel(\n",
        "  X_mice,\n",
        "  save_all_iterations=True,\n",
        "  random_state=1989\n",
        ")# Run the MICE algorithm for 3 iterations kernel.mice(3)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 133,
      "metadata": {
        "id": "Zs81EaTXmkkC"
      },
      "outputs": [],
      "source": [
        "X_mice = kernel.complete_data()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 134,
      "metadata": {
        "id": "0aKXmwOumkkC"
      },
      "outputs": [],
      "source": [
        "# fit robust scaler\n",
        "mice_pipeline = Pipeline(steps=[\n",
        "    ('RobustScaler', RobustScaler())\n",
        "])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 135,
      "metadata": {
        "id": "u9pKQrPmmkkC"
      },
      "outputs": [],
      "source": [
        "# Fit X with Mice imputer \n",
        "X_mice= mice_pipeline.fit_transform(X_mice)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 136,
      "metadata": {
        "id": "d1ytZXJRmkkD",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "6bd30f69-574c-43f3-ad39-0ae95bd6d31a"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Random Forest\n",
            "Model performance for Training set\n",
            "- Accuracy: 1.0000\n",
            "- F1 score: 1.0000\n",
            "- Precision: 1.0000\n",
            "- Recall: 1.0000\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.8125\n",
            "- F1 score: 0.8135\n",
            "- Precision: 0.8234\n",
            "- Recall: 0.8125\n",
            "===================================\n",
            "\n",
            "\n",
            "Decision Tree\n",
            "Model performance for Training set\n",
            "- Accuracy: 1.0000\n",
            "- F1 score: 1.0000\n",
            "- Precision: 1.0000\n",
            "- Recall: 1.0000\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7458\n",
            "- F1 score: 0.7464\n",
            "- Precision: 0.7512\n",
            "- Recall: 0.7458\n",
            "===================================\n",
            "\n",
            "\n",
            "K-Neighbors Classifier\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.7937\n",
            "- F1 score: 0.7927\n",
            "- Precision: 0.7933\n",
            "- Recall: 0.7937\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7042\n",
            "- F1 score: 0.7043\n",
            "- Precision: 0.7118\n",
            "- Recall: 0.7042\n",
            "===================================\n",
            "\n",
            "\n",
            "XGBClassifier\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.9458\n",
            "- F1 score: 0.9458\n",
            "- Precision: 0.9458\n",
            "- Recall: 0.9458\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.8125\n",
            "- F1 score: 0.8129\n",
            "- Precision: 0.8195\n",
            "- Recall: 0.8125\n",
            "===================================\n",
            "\n",
            "\n",
            "SVM\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.8031\n",
            "- F1 score: 0.8025\n",
            "- Precision: 0.8082\n",
            "- Recall: 0.8031\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7292\n",
            "- F1 score: 0.7286\n",
            "- Precision: 0.7373\n",
            "- Recall: 0.7292\n",
            "===================================\n",
            "\n",
            "\n"
          ]
        }
      ],
      "source": [
        "# Training the models\n",
        "report_mice = evaluate_models(X_mice, y, models)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "g4ci9G3y9VIN"
      },
      "source": [
        "#### Hyper-parameter tuning of RFC model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 139,
      "metadata": {
        "id": "NNYuPcxWhh0K",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "aa06f553-e8f7-4d74-c352-0d3f8e42e4d7"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:53,428]\u001b[0m A new study created in memory with name: no-name-598ffc68-c13c-448c-b9b7-43afeffbbe17\u001b[0m\n",
            "\u001b[32m[I 2023-02-19 13:13:53,746]\u001b[0m Trial 0 finished with value: 0.8458333333333333 and parameters: {'random_state': None, 'min_samples_split': 2, 'max_depth': 3, 'n_estimators': 337, 'min_samples_leaf': 10}. Best is trial 0 with value: 0.8458333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.845833\n",
            "Model F1-score: 0.845504\n",
            "Model precision-score: 0.848845\n",
            "Model recall-score: 0.845833\n",
            "Model Parameters: {'random_state': None, ' criterion': 2, 'max_depth': 3, 'n_estimators': 337, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:54,045]\u001b[0m Trial 1 finished with value: 0.8666666666666667 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 16, 'n_estimators': 454, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.866667\n",
            "Model F1-score: 0.8662\n",
            "Model precision-score: 0.870513\n",
            "Model recall-score: 0.866667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 16, 'n_estimators': 454, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:54,373]\u001b[0m Trial 2 finished with value: 0.7916666666666666 and parameters: {'random_state': 25, 'min_samples_split': 3, 'max_depth': 8, 'n_estimators': 768, 'min_samples_leaf': 10}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.791667\n",
            "Model F1-score: 0.791405\n",
            "Model precision-score: 0.795477\n",
            "Model recall-score: 0.791667\n",
            "Model Parameters: {'random_state': 25, ' criterion': 3, 'max_depth': 8, 'n_estimators': 768, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:54,684]\u001b[0m Trial 3 finished with value: 0.8 and parameters: {'random_state': None, 'min_samples_split': 8, 'max_depth': 3, 'n_estimators': 996, 'min_samples_leaf': 6}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8\n",
            "Model F1-score: 0.799768\n",
            "Model precision-score: 0.802259\n",
            "Model recall-score: 0.8\n",
            "Model Parameters: {'random_state': None, ' criterion': 8, 'max_depth': 3, 'n_estimators': 996, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:55,110]\u001b[0m Trial 4 finished with value: 0.7791666666666667 and parameters: {'random_state': 25, 'min_samples_split': 2, 'max_depth': 10, 'n_estimators': 424, 'min_samples_leaf': 6}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.779167\n",
            "Model F1-score: 0.778353\n",
            "Model precision-score: 0.786144\n",
            "Model recall-score: 0.779167\n",
            "Model Parameters: {'random_state': 25, ' criterion': 2, 'max_depth': 10, 'n_estimators': 424, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:55,418]\u001b[0m Trial 5 finished with value: 0.8083333333333333 and parameters: {'random_state': 0, 'min_samples_split': 8, 'max_depth': 22, 'n_estimators': 290, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.808333\n",
            "Model F1-score: 0.807999\n",
            "Model precision-score: 0.812094\n",
            "Model recall-score: 0.808333\n",
            "Model Parameters: {'random_state': 0, ' criterion': 8, 'max_depth': 22, 'n_estimators': 290, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:55,727]\u001b[0m Trial 6 finished with value: 0.7833333333333333 and parameters: {'random_state': 25, 'min_samples_split': 3, 'max_depth': 17, 'n_estimators': 978, 'min_samples_leaf': 10}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.783333\n",
            "Model F1-score: 0.782914\n",
            "Model precision-score: 0.786816\n",
            "Model recall-score: 0.783333\n",
            "Model Parameters: {'random_state': 25, ' criterion': 3, 'max_depth': 17, 'n_estimators': 978, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:56,039]\u001b[0m Trial 7 finished with value: 0.7958333333333333 and parameters: {'random_state': 25, 'min_samples_split': 7, 'max_depth': 9, 'n_estimators': 791, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.795833\n",
            "Model F1-score: 0.795604\n",
            "Model precision-score: 0.799718\n",
            "Model recall-score: 0.795833\n",
            "Model Parameters: {'random_state': 25, ' criterion': 7, 'max_depth': 9, 'n_estimators': 791, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:56,355]\u001b[0m Trial 8 finished with value: 0.7916666666666666 and parameters: {'random_state': 25, 'min_samples_split': 1, 'max_depth': 6, 'n_estimators': 749, 'min_samples_leaf': 8}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.791667\n",
            "Model F1-score: 0.790566\n",
            "Model precision-score: 0.796216\n",
            "Model recall-score: 0.791667\n",
            "Model Parameters: {'random_state': 25, ' criterion': 1, 'max_depth': 6, 'n_estimators': 749, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:56,676]\u001b[0m Trial 9 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 6, 'max_depth': 3, 'n_estimators': 355, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857304\n",
            "Model precision-score: 0.86222\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 6, 'max_depth': 3, 'n_estimators': 355, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:57,005]\u001b[0m Trial 10 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 10, 'max_depth': 31, 'n_estimators': 110, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861768\n",
            "Model precision-score: 0.864715\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 10, 'max_depth': 31, 'n_estimators': 110, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:57,333]\u001b[0m Trial 11 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 10, 'max_depth': 32, 'n_estimators': 187, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.860731\n",
            "Model precision-score: 0.865068\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 10, 'max_depth': 32, 'n_estimators': 187, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:57,671]\u001b[0m Trial 12 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 32, 'n_estimators': 106, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.847726\n",
            "Model precision-score: 0.854915\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 32, 'n_estimators': 106, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:57,998]\u001b[0m Trial 13 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 10, 'max_depth': 17, 'n_estimators': 556, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857204\n",
            "Model precision-score: 0.862117\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 10, 'max_depth': 17, 'n_estimators': 556, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:58,427]\u001b[0m Trial 14 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 15, 'n_estimators': 542, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861878\n",
            "Model precision-score: 0.864798\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 15, 'n_estimators': 542, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:58,797]\u001b[0m Trial 15 finished with value: 0.8166666666666667 and parameters: {'random_state': 0, 'min_samples_split': 4, 'max_depth': 23, 'n_estimators': 529, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.816667\n",
            "Model F1-score: 0.815927\n",
            "Model precision-score: 0.82311\n",
            "Model recall-score: 0.816667\n",
            "Model Parameters: {'random_state': 0, ' criterion': 4, 'max_depth': 23, 'n_estimators': 529, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:59,201]\u001b[0m Trial 16 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 8, 'max_depth': 12, 'n_estimators': 223, 'min_samples_leaf': 5}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.852979\n",
            "Model precision-score: 0.861348\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 8, 'max_depth': 12, 'n_estimators': 223, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:13:59,719]\u001b[0m Trial 17 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 6, 'max_depth': 6, 'n_estimators': 655, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853103\n",
            "Model precision-score: 0.855201\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 6, 'max_depth': 6, 'n_estimators': 655, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:00,217]\u001b[0m Trial 18 finished with value: 0.8333333333333334 and parameters: {'random_state': 100, 'min_samples_split': 9, 'max_depth': 23, 'n_estimators': 124, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.833333\n",
            "Model F1-score: 0.832094\n",
            "Model precision-score: 0.834411\n",
            "Model recall-score: 0.833333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 9, 'max_depth': 23, 'n_estimators': 124, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:00,733]\u001b[0m Trial 19 finished with value: 0.8291666666666667 and parameters: {'random_state': 0, 'min_samples_split': 7, 'max_depth': 12, 'n_estimators': 413, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.829167\n",
            "Model F1-score: 0.828868\n",
            "Model precision-score: 0.836019\n",
            "Model recall-score: 0.829167\n",
            "Model Parameters: {'random_state': 0, ' criterion': 7, 'max_depth': 12, 'n_estimators': 413, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:01,245]\u001b[0m Trial 20 finished with value: 0.8291666666666667 and parameters: {'random_state': None, 'min_samples_split': 4, 'max_depth': 32, 'n_estimators': 266, 'min_samples_leaf': 7}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.829167\n",
            "Model F1-score: 0.829531\n",
            "Model precision-score: 0.832716\n",
            "Model recall-score: 0.829167\n",
            "Model Parameters: {'random_state': None, ' criterion': 4, 'max_depth': 32, 'n_estimators': 266, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:01,771]\u001b[0m Trial 21 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 10, 'max_depth': 31, 'n_estimators': 191, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861138\n",
            "Model precision-score: 0.864514\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 10, 'max_depth': 31, 'n_estimators': 191, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:02,289]\u001b[0m Trial 22 finished with value: 0.8666666666666667 and parameters: {'random_state': 100, 'min_samples_split': 9, 'max_depth': 24, 'n_estimators': 173, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.866667\n",
            "Model F1-score: 0.865605\n",
            "Model precision-score: 0.867549\n",
            "Model recall-score: 0.866667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 9, 'max_depth': 24, 'n_estimators': 173, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:02,663]\u001b[0m Trial 23 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 9, 'max_depth': 21, 'n_estimators': 453, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848367\n",
            "Model precision-score: 0.853268\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 9, 'max_depth': 21, 'n_estimators': 453, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:03,001]\u001b[0m Trial 24 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 9, 'max_depth': 24, 'n_estimators': 150, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853122\n",
            "Model precision-score: 0.856479\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 9, 'max_depth': 24, 'n_estimators': 150, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:03,338]\u001b[0m Trial 25 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 7, 'max_depth': 16, 'n_estimators': 265, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857303\n",
            "Model precision-score: 0.860104\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 7, 'max_depth': 16, 'n_estimators': 265, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:03,911]\u001b[0m Trial 26 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 9, 'max_depth': 19, 'n_estimators': 649, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848987\n",
            "Model precision-score: 0.852351\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 9, 'max_depth': 19, 'n_estimators': 649, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:04,514]\u001b[0m Trial 27 finished with value: 0.8416666666666667 and parameters: {'random_state': 100, 'min_samples_split': 6, 'max_depth': 25, 'n_estimators': 335, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.841667\n",
            "Model F1-score: 0.839742\n",
            "Model precision-score: 0.847069\n",
            "Model recall-score: 0.841667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 6, 'max_depth': 25, 'n_estimators': 335, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:05,101]\u001b[0m Trial 28 finished with value: 0.8208333333333333 and parameters: {'random_state': 0, 'min_samples_split': 10, 'max_depth': 13, 'n_estimators': 220, 'min_samples_leaf': 5}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.820833\n",
            "Model F1-score: 0.820473\n",
            "Model precision-score: 0.825197\n",
            "Model recall-score: 0.820833\n",
            "Model Parameters: {'random_state': 0, ' criterion': 10, 'max_depth': 13, 'n_estimators': 220, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:05,817]\u001b[0m Trial 29 finished with value: 0.8125 and parameters: {'random_state': None, 'min_samples_split': 4, 'max_depth': 27, 'n_estimators': 347, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8125\n",
            "Model F1-score: 0.814078\n",
            "Model precision-score: 0.824426\n",
            "Model recall-score: 0.8125\n",
            "Model Parameters: {'random_state': None, ' criterion': 4, 'max_depth': 27, 'n_estimators': 347, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:06,430]\u001b[0m Trial 30 finished with value: 0.8333333333333334 and parameters: {'random_state': None, 'min_samples_split': 8, 'max_depth': 19, 'n_estimators': 877, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.833333\n",
            "Model F1-score: 0.833018\n",
            "Model precision-score: 0.834464\n",
            "Model recall-score: 0.833333\n",
            "Model Parameters: {'random_state': None, ' criterion': 8, 'max_depth': 19, 'n_estimators': 877, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:07,215]\u001b[0m Trial 31 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 10, 'max_depth': 27, 'n_estimators': 167, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.86238\n",
            "Model precision-score: 0.864476\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 10, 'max_depth': 27, 'n_estimators': 167, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:07,865]\u001b[0m Trial 32 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 10, 'max_depth': 28, 'n_estimators': 106, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.86192\n",
            "Model precision-score: 0.862592\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 10, 'max_depth': 28, 'n_estimators': 106, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:08,549]\u001b[0m Trial 33 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 9, 'max_depth': 20, 'n_estimators': 200, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861449\n",
            "Model precision-score: 0.864883\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 9, 'max_depth': 20, 'n_estimators': 200, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:09,148]\u001b[0m Trial 34 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 10, 'max_depth': 27, 'n_estimators': 308, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853136\n",
            "Model precision-score: 0.855924\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 10, 'max_depth': 27, 'n_estimators': 308, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:09,860]\u001b[0m Trial 35 finished with value: 0.8458333333333333 and parameters: {'random_state': 100, 'min_samples_split': 8, 'max_depth': 32, 'n_estimators': 250, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.845833\n",
            "Model F1-score: 0.84493\n",
            "Model precision-score: 0.846165\n",
            "Model recall-score: 0.845833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 8, 'max_depth': 32, 'n_estimators': 250, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:10,492]\u001b[0m Trial 36 finished with value: 0.8166666666666667 and parameters: {'random_state': None, 'min_samples_split': 3, 'max_depth': 22, 'n_estimators': 405, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.816667\n",
            "Model F1-score: 0.817864\n",
            "Model precision-score: 0.822804\n",
            "Model recall-score: 0.816667\n",
            "Model Parameters: {'random_state': None, ' criterion': 3, 'max_depth': 22, 'n_estimators': 405, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:11,088]\u001b[0m Trial 37 finished with value: 0.7916666666666666 and parameters: {'random_state': 25, 'min_samples_split': 9, 'max_depth': 18, 'n_estimators': 497, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.791667\n",
            "Model F1-score: 0.79204\n",
            "Model precision-score: 0.802315\n",
            "Model recall-score: 0.791667\n",
            "Model Parameters: {'random_state': 25, ' criterion': 9, 'max_depth': 18, 'n_estimators': 497, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:11,838]\u001b[0m Trial 38 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 7, 'max_depth': 26, 'n_estimators': 148, 'min_samples_leaf': 9}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861043\n",
            "Model precision-score: 0.864503\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 7, 'max_depth': 26, 'n_estimators': 148, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:12,634]\u001b[0m Trial 39 finished with value: 0.8125 and parameters: {'random_state': 0, 'min_samples_split': 2, 'max_depth': 15, 'n_estimators': 595, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8125\n",
            "Model F1-score: 0.811917\n",
            "Model precision-score: 0.819295\n",
            "Model recall-score: 0.8125\n",
            "Model Parameters: {'random_state': 0, ' criterion': 2, 'max_depth': 15, 'n_estimators': 595, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:13,715]\u001b[0m Trial 40 finished with value: 0.7958333333333333 and parameters: {'random_state': 25, 'min_samples_split': 5, 'max_depth': 21, 'n_estimators': 378, 'min_samples_leaf': 6}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.795833\n",
            "Model F1-score: 0.79535\n",
            "Model precision-score: 0.79831\n",
            "Model recall-score: 0.795833\n",
            "Model Parameters: {'random_state': 25, ' criterion': 5, 'max_depth': 21, 'n_estimators': 378, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:15,067]\u001b[0m Trial 41 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 16, 'n_estimators': 461, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848435\n",
            "Model precision-score: 0.851786\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 16, 'n_estimators': 461, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:16,369]\u001b[0m Trial 42 finished with value: 0.8666666666666667 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 15, 'n_estimators': 703, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.866667\n",
            "Model F1-score: 0.86588\n",
            "Model precision-score: 0.86684\n",
            "Model recall-score: 0.866667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 15, 'n_estimators': 703, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:16,949]\u001b[0m Trial 43 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 6, 'max_depth': 24, 'n_estimators': 715, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861163\n",
            "Model precision-score: 0.863784\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 6, 'max_depth': 24, 'n_estimators': 715, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:17,813]\u001b[0m Trial 44 finished with value: 0.8708333333333333 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 29, 'n_estimators': 933, 'min_samples_leaf': 1}. Best is trial 44 with value: 0.8708333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.870833\n",
            "Model F1-score: 0.870333\n",
            "Model precision-score: 0.871024\n",
            "Model recall-score: 0.870833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 29, 'n_estimators': 933, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:18,541]\u001b[0m Trial 45 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 18, 'n_estimators': 936, 'min_samples_leaf': 3}. Best is trial 44 with value: 0.8708333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848386\n",
            "Model precision-score: 0.85321\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 18, 'n_estimators': 936, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:19,233]\u001b[0m Trial 46 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 10, 'n_estimators': 838, 'min_samples_leaf': 2}. Best is trial 44 with value: 0.8708333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857268\n",
            "Model precision-score: 0.858812\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 10, 'n_estimators': 838, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:19,859]\u001b[0m Trial 47 finished with value: 0.7875 and parameters: {'random_state': 25, 'min_samples_split': 3, 'max_depth': 21, 'n_estimators': 955, 'min_samples_leaf': 1}. Best is trial 44 with value: 0.8708333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.7875\n",
            "Model F1-score: 0.787681\n",
            "Model precision-score: 0.796455\n",
            "Model recall-score: 0.7875\n",
            "Model Parameters: {'random_state': 25, ' criterion': 3, 'max_depth': 21, 'n_estimators': 955, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:20,820]\u001b[0m Trial 48 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 14, 'n_estimators': 811, 'min_samples_leaf': 1}. Best is trial 44 with value: 0.8708333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.862221\n",
            "Model precision-score: 0.862568\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 14, 'n_estimators': 811, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:14:21,590]\u001b[0m Trial 49 finished with value: 0.875 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 29, 'n_estimators': 910, 'min_samples_leaf': 3}. Best is trial 49 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.875\n",
            "Model F1-score: 0.874284\n",
            "Model precision-score: 0.87732\n",
            "Model recall-score: 0.875\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 29, 'n_estimators': 910, 'min_samples_leaf': 3}\n",
            "Number of finished trials: 50\n",
            "Best trial:\n",
            "  Value: 0.875\n",
            "  Params: \n",
            "    random_state: 100\n",
            "    min_samples_split: 5\n",
            "    max_depth: 29\n",
            "    n_estimators: 910\n",
            "    min_samples_leaf: 3\n"
          ]
        }
      ],
      "source": [
        "import optuna\n",
        "from sklearn.ensemble import RandomForestClassifier\n",
        "\n",
        "def objective(trial):\n",
        "    param = {\n",
        "        'random_state': trial.suggest_categorical('random_state', [0, 25, 100, None]),\n",
        "       \" criterion\" : trial.suggest_int('min_samples_split', 1,10),\n",
        "        \"max_depth\" : trial.suggest_int(\"max_depth\", 2, 32, log=True),\n",
        "        \"n_estimators\" : trial.suggest_int(\"n_estimators\", 100,1000),\n",
        "        \"min_samples_leaf\" : trial.suggest_int(\"min_samples_leaf\", 1,10)\n",
        "        # 'bootstrap': trial.suggest_categorical('bootstrap' , [True, False]),\n",
        "        # 'max_features': trial.suggest_categorical(\"max_features\" , ['auto', 'sqrt'])\n",
        "    }\n",
        "\n",
        "    suggested_random_state = param['random_state']  # also use the suggested random state value in train_test_split()\n",
        "    \n",
        "    \n",
        "    X_train, X_test, y_train, y_test = train_test_split(X_mice, y, test_size=0.2, shuffle=True, random_state=suggested_random_state)\n",
        "    clf = RandomForestClassifier()\n",
        "\n",
        "    clf.fit(X_train, y_train)\n",
        "    y_pred = clf.predict(X_test)\n",
        "    acc = accuracy_score(y_pred, y_test)\n",
        "    f1 = f1_score(y_pred, y_test, average='weighted') # Calculate F1-score #average='weighted'\n",
        "    precision = precision_score(y_pred, y_test, average='weighted') # Calculate Precision\n",
        "    recall = recall_score(y_pred, y_test, average='weighted')  # Calculate Recall\n",
        "    print(f\"Model Accuracy: {round(acc, 6)}\")\n",
        "    print(f\"Model F1-score: {round(f1, 6)}\")\n",
        "    print(f\"Model precision-score: {round(precision, 6)}\")\n",
        "    print(f\"Model recall-score: {round(recall, 6)}\")\n",
        "\n",
        "    print(f\"Model Parameters: {param}\")\n",
        "    \n",
        "    return acc  # return our objective value\n",
        "    return f1\n",
        "    return precision\n",
        "    return recall\n",
        "\n",
        "\n",
        "\n",
        "if __name__ == \"__main__\":\n",
        "    study = optuna.create_study(\n",
        "        direction=\"maximize\",\n",
        "        sampler=optuna.samplers.TPESampler()\n",
        "    )\n",
        "    study.optimize(objective, n_trials=50)\n",
        "\n",
        "    print(\"Number of finished trials: {}\".format(len(study.trials)))\n",
        "\n",
        "    print(\"Best trial:\")\n",
        "    trial = study.best_trial\n",
        "\n",
        "    print(\"  Value: {}\".format(trial.value))\n",
        "\n",
        "    print(\"  Params: \")\n",
        "    for key, value in trial.params.items():\n",
        "        print(\"    {}: {}\".format(key, value))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "i-TgfWsVg9EJ"
      },
      "source": [
        "Hypertuned ROC-AUC"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 140,
      "metadata": {
        "id": "5ojVztJ-g9Ue",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "ff07b60f-fcd6-4033-8213-4a9fdf544b0b"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "- ROC_AUC of RFC: 0.9486\n",
            "10-fold CV mean of RFC: 0.9092\n",
            "10-fold CV std of RFC: 0.0440\n"
          ]
        }
      ],
      "source": [
        "mice_pipeline1 = Pipeline(steps=[\n",
        "    ('RobustScaler', RobustScaler())\n",
        "])\n",
        "X_mice1= mice_pipeline1.fit_transform(X_full)\n",
        "X_train_MICE, X_test_MICE, y_train_MICE, y_test_MICE = train_test_split(X_mice1, y_full, test_size=0.2, random_state=42)\n",
        "\n",
        "model_rf_mice = RandomForestClassifier(random_state=  100,\n",
        "    min_samples_split= 3,\n",
        "    max_depth= 16,\n",
        "    n_estimators= 804,\n",
        "    min_samples_leaf= 6 ).fit(X_train_MICE, y_train_MICE)\n",
        "\n",
        "probs_rf_mice = model_rf_mice.predict_proba(X_test_MICE)#[:, 1]\n",
        "auc_rf1mice = roc_auc_score(y_test_MICE, probs_rf_mice, multi_class='ovr')\n",
        "\n",
        "print('- ROC_AUC of RFC: {:.4f}'.format(auc_rf1mice))\n",
        "\n",
        "\n",
        "### micetuned CV score\n",
        "RFC_mice = RandomForestClassifier(random_state= 100,\n",
        "    min_samples_split= 5,\n",
        "    max_depth= 23,\n",
        "    n_estimators= 784,\n",
        "    min_samples_leaf= 7  )\n",
        "\n",
        "scores_cv_rf= cross_val_score(RFC_mice, X_mice1, y_full, cv=10, scoring='roc_auc_ovr').mean()\n",
        "scores_cv_rf_std= cross_val_score(RFC_mice, X_mice1, y_full, cv=10, scoring='roc_auc_ovr').std()\n",
        "#print('10-fold CV of RFC', (scores_cv_rf))\n",
        "print('10-fold CV mean of RFC: {:.4f}'.format(scores_cv_rf))\n",
        "print('10-fold CV std of RFC: {:.4f}'.format(scores_cv_rf_std))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "mHPzp3N6mkkD"
      },
      "source": [
        "### Experiment: 4 = Simple Imputer with Strategy Constant "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 142,
      "metadata": {
        "id": "_6TzxxAomkkD"
      },
      "outputs": [],
      "source": [
        "# Create a pipeline with simple imputer with strategy constant and fill value 0\n",
        "constant_pipeline = Pipeline(steps=[\n",
        "    ('Imputer', SimpleImputer(strategy='constant', fill_value=0)),\n",
        "    ('RobustScaler', RobustScaler())\n",
        "])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 143,
      "metadata": {
        "id": "vztriY6kmkkD"
      },
      "outputs": [],
      "source": [
        "X_const =constant_pipeline.fit_transform(X)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 144,
      "metadata": {
        "id": "eGPMRNjjmkkE",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "3ac90223-98a9-46d5-c72f-438b0ab36052"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Random Forest\n",
            "Model performance for Training set\n",
            "- Accuracy: 1.0000\n",
            "- F1 score: 1.0000\n",
            "- Precision: 1.0000\n",
            "- Recall: 1.0000\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.8292\n",
            "- F1 score: 0.8283\n",
            "- Precision: 0.8354\n",
            "- Recall: 0.8292\n",
            "===================================\n",
            "\n",
            "\n",
            "Decision Tree\n",
            "Model performance for Training set\n",
            "- Accuracy: 1.0000\n",
            "- F1 score: 1.0000\n",
            "- Precision: 1.0000\n",
            "- Recall: 1.0000\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7458\n",
            "- F1 score: 0.7468\n",
            "- Precision: 0.7517\n",
            "- Recall: 0.7458\n",
            "===================================\n",
            "\n",
            "\n",
            "K-Neighbors Classifier\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.7937\n",
            "- F1 score: 0.7927\n",
            "- Precision: 0.7933\n",
            "- Recall: 0.7937\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7042\n",
            "- F1 score: 0.7043\n",
            "- Precision: 0.7118\n",
            "- Recall: 0.7042\n",
            "===================================\n",
            "\n",
            "\n",
            "XGBClassifier\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.9458\n",
            "- F1 score: 0.9458\n",
            "- Precision: 0.9458\n",
            "- Recall: 0.9458\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.8125\n",
            "- F1 score: 0.8129\n",
            "- Precision: 0.8195\n",
            "- Recall: 0.8125\n",
            "===================================\n",
            "\n",
            "\n",
            "SVM\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.8031\n",
            "- F1 score: 0.8025\n",
            "- Precision: 0.8082\n",
            "- Recall: 0.8031\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7292\n",
            "- F1 score: 0.7286\n",
            "- Precision: 0.7373\n",
            "- Recall: 0.7292\n",
            "===================================\n",
            "\n",
            "\n"
          ]
        }
      ],
      "source": [
        "# training the models\n",
        "report_const = evaluate_models(X_const, y, models)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "q7Xlqoib9cPq"
      },
      "source": [
        "#### Hyper-parameter tuning of RFC model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 146,
      "metadata": {
        "id": "jIHz1PGChmgk",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "1e999bee-6fa2-4e31-ba83-c83884486a56"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:18,479]\u001b[0m A new study created in memory with name: no-name-b141eb3f-b972-4b27-bec0-f83292123edb\u001b[0m\n",
            "\u001b[32m[I 2023-02-19 13:15:18,797]\u001b[0m Trial 0 finished with value: 0.8208333333333333 and parameters: {'random_state': None, 'min_samples_split': 4, 'max_depth': 9, 'n_estimators': 908, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.8208333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.820833\n",
            "Model F1-score: 0.821547\n",
            "Model precision-score: 0.831436\n",
            "Model recall-score: 0.820833\n",
            "Model Parameters: {'random_state': None, ' criterion': 4, 'max_depth': 9, 'n_estimators': 908, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:19,113]\u001b[0m Trial 1 finished with value: 0.8333333333333334 and parameters: {'random_state': None, 'min_samples_split': 6, 'max_depth': 17, 'n_estimators': 219, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.8333333333333334.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.833333\n",
            "Model F1-score: 0.834837\n",
            "Model precision-score: 0.838982\n",
            "Model recall-score: 0.833333\n",
            "Model Parameters: {'random_state': None, ' criterion': 6, 'max_depth': 17, 'n_estimators': 219, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:19,422]\u001b[0m Trial 2 finished with value: 0.7833333333333333 and parameters: {'random_state': 25, 'min_samples_split': 6, 'max_depth': 21, 'n_estimators': 779, 'min_samples_leaf': 8}. Best is trial 1 with value: 0.8333333333333334.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.783333\n",
            "Model F1-score: 0.781852\n",
            "Model precision-score: 0.78746\n",
            "Model recall-score: 0.783333\n",
            "Model Parameters: {'random_state': 25, ' criterion': 6, 'max_depth': 21, 'n_estimators': 779, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:19,729]\u001b[0m Trial 3 finished with value: 0.8125 and parameters: {'random_state': 0, 'min_samples_split': 8, 'max_depth': 16, 'n_estimators': 394, 'min_samples_leaf': 8}. Best is trial 1 with value: 0.8333333333333334.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8125\n",
            "Model F1-score: 0.812484\n",
            "Model precision-score: 0.820466\n",
            "Model recall-score: 0.8125\n",
            "Model Parameters: {'random_state': 0, ' criterion': 8, 'max_depth': 16, 'n_estimators': 394, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:20,035]\u001b[0m Trial 4 finished with value: 0.8 and parameters: {'random_state': 0, 'min_samples_split': 4, 'max_depth': 4, 'n_estimators': 595, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.8333333333333334.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8\n",
            "Model F1-score: 0.799875\n",
            "Model precision-score: 0.805551\n",
            "Model recall-score: 0.8\n",
            "Model Parameters: {'random_state': 0, ' criterion': 4, 'max_depth': 4, 'n_estimators': 595, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:20,418]\u001b[0m Trial 5 finished with value: 0.825 and parameters: {'random_state': None, 'min_samples_split': 2, 'max_depth': 20, 'n_estimators': 549, 'min_samples_leaf': 7}. Best is trial 1 with value: 0.8333333333333334.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.825\n",
            "Model F1-score: 0.82584\n",
            "Model precision-score: 0.829859\n",
            "Model recall-score: 0.825\n",
            "Model Parameters: {'random_state': None, ' criterion': 2, 'max_depth': 20, 'n_estimators': 549, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:20,886]\u001b[0m Trial 6 finished with value: 0.8125 and parameters: {'random_state': 0, 'min_samples_split': 10, 'max_depth': 20, 'n_estimators': 700, 'min_samples_leaf': 10}. Best is trial 1 with value: 0.8333333333333334.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8125\n",
            "Model F1-score: 0.812188\n",
            "Model precision-score: 0.819291\n",
            "Model recall-score: 0.8125\n",
            "Model Parameters: {'random_state': 0, ' criterion': 10, 'max_depth': 20, 'n_estimators': 700, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:21,356]\u001b[0m Trial 7 finished with value: 0.8208333333333333 and parameters: {'random_state': 0, 'min_samples_split': 6, 'max_depth': 26, 'n_estimators': 160, 'min_samples_leaf': 6}. Best is trial 1 with value: 0.8333333333333334.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.820833\n",
            "Model F1-score: 0.820294\n",
            "Model precision-score: 0.826238\n",
            "Model recall-score: 0.820833\n",
            "Model Parameters: {'random_state': 0, ' criterion': 6, 'max_depth': 26, 'n_estimators': 160, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:21,823]\u001b[0m Trial 8 finished with value: 0.7833333333333333 and parameters: {'random_state': 25, 'min_samples_split': 7, 'max_depth': 30, 'n_estimators': 136, 'min_samples_leaf': 9}. Best is trial 1 with value: 0.8333333333333334.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.783333\n",
            "Model F1-score: 0.783022\n",
            "Model precision-score: 0.788968\n",
            "Model recall-score: 0.783333\n",
            "Model Parameters: {'random_state': 25, ' criterion': 7, 'max_depth': 30, 'n_estimators': 136, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:22,293]\u001b[0m Trial 9 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 2, 'n_estimators': 490, 'min_samples_leaf': 6}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861049\n",
            "Model precision-score: 0.865039\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 2, 'n_estimators': 490, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:22,800]\u001b[0m Trial 10 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 1, 'max_depth': 2, 'n_estimators': 432, 'min_samples_leaf': 1}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853001\n",
            "Model precision-score: 0.855192\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 1, 'max_depth': 2, 'n_estimators': 432, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:23,301]\u001b[0m Trial 11 finished with value: 0.8458333333333333 and parameters: {'random_state': 100, 'min_samples_split': 1, 'max_depth': 2, 'n_estimators': 396, 'min_samples_leaf': 1}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.845833\n",
            "Model F1-score: 0.844162\n",
            "Model precision-score: 0.847973\n",
            "Model recall-score: 0.845833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 1, 'max_depth': 2, 'n_estimators': 396, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:23,749]\u001b[0m Trial 12 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 2, 'n_estimators': 394, 'min_samples_leaf': 1}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.8614\n",
            "Model precision-score: 0.863481\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 2, 'n_estimators': 394, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:24,074]\u001b[0m Trial 13 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 3, 'n_estimators': 289, 'min_samples_leaf': 5}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861687\n",
            "Model precision-score: 0.862882\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 3, 'n_estimators': 289, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:24,398]\u001b[0m Trial 14 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 4, 'n_estimators': 505, 'min_samples_leaf': 5}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857614\n",
            "Model precision-score: 0.858859\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 4, 'n_estimators': 505, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:24,725]\u001b[0m Trial 15 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 2, 'n_estimators': 673, 'min_samples_leaf': 3}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853144\n",
            "Model precision-score: 0.85605\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 2, 'n_estimators': 673, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:25,046]\u001b[0m Trial 16 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 3, 'n_estimators': 296, 'min_samples_leaf': 6}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.862172\n",
            "Model precision-score: 0.865611\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 3, 'n_estimators': 296, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:25,385]\u001b[0m Trial 17 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 6, 'n_estimators': 989, 'min_samples_leaf': 2}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853016\n",
            "Model precision-score: 0.855888\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 6, 'n_estimators': 989, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:25,705]\u001b[0m Trial 18 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 2, 'n_estimators': 465, 'min_samples_leaf': 7}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848693\n",
            "Model precision-score: 0.852398\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 2, 'n_estimators': 465, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:26,143]\u001b[0m Trial 19 finished with value: 0.7875 and parameters: {'random_state': 25, 'min_samples_split': 2, 'max_depth': 3, 'n_estimators': 275, 'min_samples_leaf': 4}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.7875\n",
            "Model F1-score: 0.787213\n",
            "Model precision-score: 0.790851\n",
            "Model recall-score: 0.7875\n",
            "Model Parameters: {'random_state': 25, ' criterion': 2, 'max_depth': 3, 'n_estimators': 275, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:26,519]\u001b[0m Trial 20 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 7, 'n_estimators': 588, 'min_samples_leaf': 4}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857002\n",
            "Model precision-score: 0.858231\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 7, 'n_estimators': 588, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:26,853]\u001b[0m Trial 21 finished with value: 0.8416666666666667 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 3, 'n_estimators': 331, 'min_samples_leaf': 5}. Best is trial 9 with value: 0.8625.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.841667\n",
            "Model F1-score: 0.84056\n",
            "Model precision-score: 0.842568\n",
            "Model recall-score: 0.841667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 3, 'n_estimators': 331, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:27,176]\u001b[0m Trial 22 finished with value: 0.8666666666666667 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 2, 'n_estimators': 365, 'min_samples_leaf': 7}. Best is trial 22 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.866667\n",
            "Model F1-score: 0.865846\n",
            "Model precision-score: 0.867624\n",
            "Model recall-score: 0.866667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 2, 'n_estimators': 365, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:27,514]\u001b[0m Trial 23 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 2, 'n_estimators': 346, 'min_samples_leaf': 7}. Best is trial 22 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848073\n",
            "Model precision-score: 0.851963\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 2, 'n_estimators': 346, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:27,840]\u001b[0m Trial 24 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 1, 'max_depth': 2, 'n_estimators': 519, 'min_samples_leaf': 6}. Best is trial 22 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.85786\n",
            "Model precision-score: 0.860344\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 1, 'max_depth': 2, 'n_estimators': 519, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:28,164]\u001b[0m Trial 25 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 4, 'n_estimators': 231, 'min_samples_leaf': 8}. Best is trial 22 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.852471\n",
            "Model precision-score: 0.856483\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 4, 'n_estimators': 231, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:28,503]\u001b[0m Trial 26 finished with value: 0.8416666666666667 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 3, 'n_estimators': 446, 'min_samples_leaf': 10}. Best is trial 22 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.841667\n",
            "Model F1-score: 0.840361\n",
            "Model precision-score: 0.844063\n",
            "Model recall-score: 0.841667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 3, 'n_estimators': 446, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:28,828]\u001b[0m Trial 27 finished with value: 0.7791666666666667 and parameters: {'random_state': 25, 'min_samples_split': 3, 'max_depth': 2, 'n_estimators': 693, 'min_samples_leaf': 9}. Best is trial 22 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.779167\n",
            "Model F1-score: 0.778123\n",
            "Model precision-score: 0.782635\n",
            "Model recall-score: 0.779167\n",
            "Model Parameters: {'random_state': 25, ' criterion': 3, 'max_depth': 2, 'n_estimators': 693, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:29,157]\u001b[0m Trial 28 finished with value: 0.8125 and parameters: {'random_state': None, 'min_samples_split': 10, 'max_depth': 5, 'n_estimators': 387, 'min_samples_leaf': 7}. Best is trial 22 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8125\n",
            "Model F1-score: 0.812283\n",
            "Model precision-score: 0.819525\n",
            "Model recall-score: 0.8125\n",
            "Model Parameters: {'random_state': None, ' criterion': 10, 'max_depth': 5, 'n_estimators': 387, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:29,499]\u001b[0m Trial 29 finished with value: 0.8416666666666667 and parameters: {'random_state': None, 'min_samples_split': 4, 'max_depth': 9, 'n_estimators': 780, 'min_samples_leaf': 2}. Best is trial 22 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.841667\n",
            "Model F1-score: 0.841584\n",
            "Model precision-score: 0.846254\n",
            "Model recall-score: 0.841667\n",
            "Model Parameters: {'random_state': None, ' criterion': 4, 'max_depth': 9, 'n_estimators': 780, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:29,825]\u001b[0m Trial 30 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 8, 'max_depth': 11, 'n_estimators': 217, 'min_samples_leaf': 6}. Best is trial 22 with value: 0.8666666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857151\n",
            "Model precision-score: 0.860237\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 8, 'max_depth': 11, 'n_estimators': 217, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:30,152]\u001b[0m Trial 31 finished with value: 0.875 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 3, 'n_estimators': 347, 'min_samples_leaf': 5}. Best is trial 31 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.875\n",
            "Model F1-score: 0.874163\n",
            "Model precision-score: 0.876507\n",
            "Model recall-score: 0.875\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 3, 'n_estimators': 347, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:30,489]\u001b[0m Trial 32 finished with value: 0.8708333333333333 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 3, 'n_estimators': 477, 'min_samples_leaf': 2}. Best is trial 31 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.870833\n",
            "Model F1-score: 0.869634\n",
            "Model precision-score: 0.872587\n",
            "Model recall-score: 0.870833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 3, 'n_estimators': 477, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:30,815]\u001b[0m Trial 33 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 3, 'n_estimators': 490, 'min_samples_leaf': 2}. Best is trial 31 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857546\n",
            "Model precision-score: 0.861553\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 3, 'n_estimators': 490, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:31,141]\u001b[0m Trial 34 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 1, 'max_depth': 4, 'n_estimators': 566, 'min_samples_leaf': 3}. Best is trial 31 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.856561\n",
            "Model precision-score: 0.860302\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 1, 'max_depth': 4, 'n_estimators': 566, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:31,476]\u001b[0m Trial 35 finished with value: 0.8083333333333333 and parameters: {'random_state': None, 'min_samples_split': 4, 'max_depth': 5, 'n_estimators': 629, 'min_samples_leaf': 8}. Best is trial 31 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.808333\n",
            "Model F1-score: 0.809421\n",
            "Model precision-score: 0.822804\n",
            "Model recall-score: 0.808333\n",
            "Model Parameters: {'random_state': None, ' criterion': 4, 'max_depth': 5, 'n_estimators': 629, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:31,799]\u001b[0m Trial 36 finished with value: 0.8125 and parameters: {'random_state': 0, 'min_samples_split': 2, 'max_depth': 3, 'n_estimators': 338, 'min_samples_leaf': 4}. Best is trial 31 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8125\n",
            "Model F1-score: 0.81254\n",
            "Model precision-score: 0.819684\n",
            "Model recall-score: 0.8125\n",
            "Model Parameters: {'random_state': 0, ' criterion': 2, 'max_depth': 3, 'n_estimators': 338, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:32,131]\u001b[0m Trial 37 finished with value: 0.7916666666666666 and parameters: {'random_state': 25, 'min_samples_split': 5, 'max_depth': 15, 'n_estimators': 437, 'min_samples_leaf': 5}. Best is trial 31 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.791667\n",
            "Model F1-score: 0.790746\n",
            "Model precision-score: 0.796382\n",
            "Model recall-score: 0.791667\n",
            "Model Parameters: {'random_state': 25, ' criterion': 5, 'max_depth': 15, 'n_estimators': 437, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:32,471]\u001b[0m Trial 38 finished with value: 0.8708333333333333 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 5, 'n_estimators': 770, 'min_samples_leaf': 7}. Best is trial 31 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.870833\n",
            "Model F1-score: 0.869344\n",
            "Model precision-score: 0.871887\n",
            "Model recall-score: 0.870833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 5, 'n_estimators': 770, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:32,796]\u001b[0m Trial 39 finished with value: 0.8041666666666667 and parameters: {'random_state': 0, 'min_samples_split': 1, 'max_depth': 5, 'n_estimators': 847, 'min_samples_leaf': 8}. Best is trial 31 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.804167\n",
            "Model F1-score: 0.803858\n",
            "Model precision-score: 0.810963\n",
            "Model recall-score: 0.804167\n",
            "Model Parameters: {'random_state': 0, ' criterion': 1, 'max_depth': 5, 'n_estimators': 847, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:33,130]\u001b[0m Trial 40 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 6, 'n_estimators': 770, 'min_samples_leaf': 9}. Best is trial 31 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.8568\n",
            "Model precision-score: 0.863078\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 6, 'n_estimators': 770, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:33,464]\u001b[0m Trial 41 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 4, 'n_estimators': 542, 'min_samples_leaf': 7}. Best is trial 31 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861867\n",
            "Model precision-score: 0.865757\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 4, 'n_estimators': 542, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:33,849]\u001b[0m Trial 42 finished with value: 0.8708333333333333 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 3, 'n_estimators': 630, 'min_samples_leaf': 6}. Best is trial 31 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.870833\n",
            "Model F1-score: 0.869779\n",
            "Model precision-score: 0.874928\n",
            "Model recall-score: 0.870833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 3, 'n_estimators': 630, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:34,367]\u001b[0m Trial 43 finished with value: 0.8791666666666667 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 4, 'n_estimators': 625, 'min_samples_leaf': 7}. Best is trial 43 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.879167\n",
            "Model F1-score: 0.878051\n",
            "Model precision-score: 0.880251\n",
            "Model recall-score: 0.879167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 4, 'n_estimators': 625, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:34,888]\u001b[0m Trial 44 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 4, 'n_estimators': 631, 'min_samples_leaf': 8}. Best is trial 43 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.85305\n",
            "Model precision-score: 0.855694\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 4, 'n_estimators': 631, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:35,421]\u001b[0m Trial 45 finished with value: 0.8125 and parameters: {'random_state': 0, 'min_samples_split': 4, 'max_depth': 5, 'n_estimators': 788, 'min_samples_leaf': 6}. Best is trial 43 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8125\n",
            "Model F1-score: 0.812171\n",
            "Model precision-score: 0.820454\n",
            "Model recall-score: 0.8125\n",
            "Model Parameters: {'random_state': 0, ' criterion': 4, 'max_depth': 5, 'n_estimators': 788, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:35,938]\u001b[0m Trial 46 finished with value: 0.8416666666666667 and parameters: {'random_state': None, 'min_samples_split': 2, 'max_depth': 4, 'n_estimators': 715, 'min_samples_leaf': 7}. Best is trial 43 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.841667\n",
            "Model F1-score: 0.841703\n",
            "Model precision-score: 0.853306\n",
            "Model recall-score: 0.841667\n",
            "Model Parameters: {'random_state': None, ' criterion': 2, 'max_depth': 4, 'n_estimators': 715, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:36,458]\u001b[0m Trial 47 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 6, 'max_depth': 7, 'n_estimators': 864, 'min_samples_leaf': 4}. Best is trial 43 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848641\n",
            "Model precision-score: 0.85062\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 6, 'max_depth': 7, 'n_estimators': 864, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:36,992]\u001b[0m Trial 48 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 7, 'max_depth': 3, 'n_estimators': 734, 'min_samples_leaf': 5}. Best is trial 43 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.856591\n",
            "Model precision-score: 0.860982\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 7, 'max_depth': 3, 'n_estimators': 734, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:15:37,343]\u001b[0m Trial 49 finished with value: 0.8 and parameters: {'random_state': 25, 'min_samples_split': 6, 'max_depth': 6, 'n_estimators': 654, 'min_samples_leaf': 6}. Best is trial 43 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8\n",
            "Model F1-score: 0.799179\n",
            "Model precision-score: 0.804998\n",
            "Model recall-score: 0.8\n",
            "Model Parameters: {'random_state': 25, ' criterion': 6, 'max_depth': 6, 'n_estimators': 654, 'min_samples_leaf': 6}\n",
            "Number of finished trials: 50\n",
            "Best trial:\n",
            "  Value: 0.8791666666666667\n",
            "  Params: \n",
            "    random_state: 100\n",
            "    min_samples_split: 4\n",
            "    max_depth: 4\n",
            "    n_estimators: 625\n",
            "    min_samples_leaf: 7\n"
          ]
        }
      ],
      "source": [
        "import optuna\n",
        "from sklearn.ensemble import RandomForestClassifier\n",
        "\n",
        "def objective(trial):\n",
        "    param = {\n",
        "        'random_state': trial.suggest_categorical('random_state', [0, 25, 100, None]),\n",
        "       \" criterion\" : trial.suggest_int('min_samples_split', 1,10),\n",
        "        \"max_depth\" : trial.suggest_int(\"max_depth\", 2, 32, log=True),\n",
        "        \"n_estimators\" : trial.suggest_int(\"n_estimators\", 100,1000),\n",
        "        \"min_samples_leaf\" : trial.suggest_int(\"min_samples_leaf\", 1,10)\n",
        "        # 'bootstrap': trial.suggest_categorical('bootstrap' , [True, False]),\n",
        "        # 'max_features': trial.suggest_categorical(\"max_features\" , ['auto', 'sqrt'])\n",
        "    }\n",
        "\n",
        "    suggested_random_state = param['random_state']  # also use the suggested random state value in train_test_split()\n",
        "    \n",
        "    \n",
        "    X_train, X_test, y_train, y_test = train_test_split(X_const, y, test_size=0.2, shuffle=True, random_state=suggested_random_state)\n",
        "    clf = RandomForestClassifier()\n",
        "\n",
        "    clf.fit(X_train, y_train)\n",
        "    y_pred = clf.predict(X_test)\n",
        "    acc = accuracy_score(y_pred, y_test)\n",
        "    f1 = f1_score(y_pred, y_test, average='weighted') # Calculate F1-score #average='weighted'\n",
        "    precision = precision_score(y_pred, y_test, average='weighted') # Calculate Precision\n",
        "    recall = recall_score(y_pred, y_test, average='weighted')  # Calculate Recall\n",
        "    print(f\"Model Accuracy: {round(acc, 6)}\")\n",
        "    print(f\"Model F1-score: {round(f1, 6)}\")\n",
        "    print(f\"Model precision-score: {round(precision, 6)}\")\n",
        "    print(f\"Model recall-score: {round(recall, 6)}\")\n",
        "\n",
        "    print(f\"Model Parameters: {param}\")\n",
        "    \n",
        "    return acc  # return our objective value\n",
        "    return f1\n",
        "    return precision\n",
        "    return recall\n",
        "\n",
        "if __name__ == \"__main__\":\n",
        "    study = optuna.create_study(\n",
        "        direction=\"maximize\",\n",
        "        sampler=optuna.samplers.TPESampler()\n",
        "    )\n",
        "    study.optimize(objective, n_trials=50)\n",
        "\n",
        "    print(\"Number of finished trials: {}\".format(len(study.trials)))\n",
        "\n",
        "    print(\"Best trial:\")\n",
        "    trial = study.best_trial\n",
        "\n",
        "    print(\"  Value: {}\".format(trial.value))\n",
        "\n",
        "    print(\"  Params: \")\n",
        "    for key, value in trial.params.items():\n",
        "        print(\"    {}: {}\".format(key, value))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 147,
      "metadata": {
        "id": "ji13w0mGi4a6",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "5f4b8016-e3d9-49cd-81e8-16834f2b3962"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "- ROC_AUC of RFC: 0.9553\n",
            "10-fold CV mean of RFC: 0.9098\n",
            "10-fold CV std of RFC: 0.0435\n"
          ]
        }
      ],
      "source": [
        "constant_pipeline1 = Pipeline(steps=[\n",
        "    ('Imputer', SimpleImputer(strategy='constant', fill_value=0)),\n",
        "    ('RobustScaler', RobustScaler())\n",
        "])\n",
        "\n",
        "X_constan1= constant_pipeline1.fit_transform(X_full)\n",
        "X_train_constan, X_test_constan, y_train_constan, y_test_constan = train_test_split(X_constan1, y_full, test_size=0.2, random_state=42)\n",
        "\n",
        "model_rf_constan = RandomForestClassifier( random_state= 100,\n",
        "    min_samples_split= 10,\n",
        "    max_depth= 16,\n",
        "    n_estimators= 786,\n",
        "    min_samples_leaf= 2   ).fit(X_train_constan, y_train_constan)\n",
        "\n",
        "probs_rf_constan = model_rf_constan.predict_proba(X_test_constan)#[:, 1]\n",
        "auc_rf1constan = roc_auc_score(y_test_constan, probs_rf_constan, multi_class='ovr')\n",
        "\n",
        "print('- ROC_AUC of RFC: {:.4f}'.format(auc_rf1constan))\n",
        "\n",
        "\n",
        "### constantuned CV score\n",
        "RFC_constan = RandomForestClassifier( random_state= 100,\n",
        "    min_samples_split= 9,\n",
        "    max_depth= 9,\n",
        "    n_estimators= 708,\n",
        "    min_samples_leaf= 6  )\n",
        "\n",
        "scores_cv_rf= cross_val_score(RFC_constan, X_constan1, y_full, cv=10, scoring='roc_auc_ovr').mean()\n",
        "scores_cv_rf_std= cross_val_score(RFC_constan, X_constan1, y_full, cv=10, scoring='roc_auc_ovr').std()\n",
        "#print('10-fold CV of RFC', (scores_cv_rf))\n",
        "print('10-fold CV mean of RFC: {:.4f}'.format(scores_cv_rf))\n",
        "print('10-fold CV std of RFC: {:.4f}'.format(scores_cv_rf_std))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "00cLvBTlmkkE"
      },
      "source": [
        "## Experiment: 5 = Simple Imputer with Strategy Mean \n",
        "\n",
        "- Another strategy which can be used is replacing missing values with mean\n",
        "- Here we replace the missing values with the mean of the column"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 148,
      "metadata": {
        "id": "kPxglohumkkE"
      },
      "outputs": [],
      "source": [
        "# Create a pipeline with Simple imputer with strategy mean\n",
        "mean_pipeline = Pipeline(steps=[\n",
        "    ('Imputer', SimpleImputer(strategy='mean')),\n",
        "    ('RobustScaler', RobustScaler())\n",
        "])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 149,
      "metadata": {
        "id": "f1Kv0g93mkkE"
      },
      "outputs": [],
      "source": [
        "X_mean = mean_pipeline.fit_transform(X)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 150,
      "metadata": {
        "id": "OiW9jYe7mkkE",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "0428fd77-8eae-4412-d10a-628e36a32715"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Random Forest\n",
            "Model performance for Training set\n",
            "- Accuracy: 1.0000\n",
            "- F1 score: 1.0000\n",
            "- Precision: 1.0000\n",
            "- Recall: 1.0000\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.8333\n",
            "- F1 score: 0.8329\n",
            "- Precision: 0.8419\n",
            "- Recall: 0.8333\n",
            "===================================\n",
            "\n",
            "\n",
            "Decision Tree\n",
            "Model performance for Training set\n",
            "- Accuracy: 1.0000\n",
            "- F1 score: 1.0000\n",
            "- Precision: 1.0000\n",
            "- Recall: 1.0000\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7583\n",
            "- F1 score: 0.7580\n",
            "- Precision: 0.7617\n",
            "- Recall: 0.7583\n",
            "===================================\n",
            "\n",
            "\n",
            "K-Neighbors Classifier\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.7937\n",
            "- F1 score: 0.7927\n",
            "- Precision: 0.7933\n",
            "- Recall: 0.7937\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7042\n",
            "- F1 score: 0.7043\n",
            "- Precision: 0.7118\n",
            "- Recall: 0.7042\n",
            "===================================\n",
            "\n",
            "\n",
            "XGBClassifier\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.9458\n",
            "- F1 score: 0.9458\n",
            "- Precision: 0.9458\n",
            "- Recall: 0.9458\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.8125\n",
            "- F1 score: 0.8129\n",
            "- Precision: 0.8195\n",
            "- Recall: 0.8125\n",
            "===================================\n",
            "\n",
            "\n",
            "SVM\n",
            "Model performance for Training set\n",
            "- Accuracy: 0.8031\n",
            "- F1 score: 0.8025\n",
            "- Precision: 0.8082\n",
            "- Recall: 0.8031\n",
            "----------------------------------\n",
            "Model performance for Test set\n",
            "- Accuracy: 0.7292\n",
            "- F1 score: 0.7286\n",
            "- Precision: 0.7373\n",
            "- Recall: 0.7292\n",
            "===================================\n",
            "\n",
            "\n"
          ]
        }
      ],
      "source": [
        "# Training all models\n",
        "report_mean = evaluate_models(X_mean, y, models)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 151,
      "metadata": {
        "id": "JD6l6Pgpzxpg"
      },
      "outputs": [],
      "source": [
        "mean_pipeline1 = Pipeline(steps=[\n",
        "    ('Imputer', SimpleImputer(strategy='mean')),\n",
        "    ('RobustScaler', RobustScaler())\n",
        "])\n",
        "\n",
        "X_mean1= mean_pipeline1.fit_transform(X_full)\n",
        "X_train_mean1, X_test_mean1, y_train_mean1, y_test_mean1 = train_test_split(X_mean1, y_full, test_size=0.2, random_state=42)\n",
        "model_rf_mean1 = RandomForestClassifier().fit(X_train_mean1, y_train_mean1)\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ZDhSlK8i9kP0"
      },
      "source": [
        "#### Hyper-parameter tuning of RFC model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 152,
      "metadata": {
        "id": "qtBjTu1Ehvcf",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "8af8e48a-95ae-478b-b858-4daf8fc1abdd"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:35,576]\u001b[0m A new study created in memory with name: no-name-77d76c85-130f-45dc-959b-714416688d5c\u001b[0m\n",
            "\u001b[32m[I 2023-02-19 13:16:35,886]\u001b[0m Trial 0 finished with value: 0.8 and parameters: {'random_state': 25, 'min_samples_split': 7, 'max_depth': 2, 'n_estimators': 965, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.8.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8\n",
            "Model F1-score: 0.799338\n",
            "Model precision-score: 0.80341\n",
            "Model recall-score: 0.8\n",
            "Model Parameters: {'random_state': 25, ' criterion': 7, 'max_depth': 2, 'n_estimators': 965, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:36,193]\u001b[0m Trial 1 finished with value: 0.8166666666666667 and parameters: {'random_state': 0, 'min_samples_split': 5, 'max_depth': 6, 'n_estimators': 277, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.8166666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.816667\n",
            "Model F1-score: 0.8162\n",
            "Model precision-score: 0.823599\n",
            "Model recall-score: 0.816667\n",
            "Model Parameters: {'random_state': 0, ' criterion': 5, 'max_depth': 6, 'n_estimators': 277, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:36,507]\u001b[0m Trial 2 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 1, 'max_depth': 8, 'n_estimators': 222, 'min_samples_leaf': 8}. Best is trial 2 with value: 0.8583333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857446\n",
            "Model precision-score: 0.858785\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 1, 'max_depth': 8, 'n_estimators': 222, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:36,830]\u001b[0m Trial 3 finished with value: 0.7958333333333333 and parameters: {'random_state': 25, 'min_samples_split': 9, 'max_depth': 2, 'n_estimators': 530, 'min_samples_leaf': 6}. Best is trial 2 with value: 0.8583333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.795833\n",
            "Model F1-score: 0.79602\n",
            "Model precision-score: 0.804194\n",
            "Model recall-score: 0.795833\n",
            "Model Parameters: {'random_state': 25, ' criterion': 9, 'max_depth': 2, 'n_estimators': 530, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:37,146]\u001b[0m Trial 4 finished with value: 0.8 and parameters: {'random_state': None, 'min_samples_split': 6, 'max_depth': 10, 'n_estimators': 333, 'min_samples_leaf': 4}. Best is trial 2 with value: 0.8583333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8\n",
            "Model F1-score: 0.799304\n",
            "Model precision-score: 0.806155\n",
            "Model recall-score: 0.8\n",
            "Model Parameters: {'random_state': None, ' criterion': 6, 'max_depth': 10, 'n_estimators': 333, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:37,477]\u001b[0m Trial 5 finished with value: 0.7833333333333333 and parameters: {'random_state': 25, 'min_samples_split': 8, 'max_depth': 15, 'n_estimators': 997, 'min_samples_leaf': 3}. Best is trial 2 with value: 0.8583333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.783333\n",
            "Model F1-score: 0.782625\n",
            "Model precision-score: 0.789128\n",
            "Model recall-score: 0.783333\n",
            "Model Parameters: {'random_state': 25, ' criterion': 8, 'max_depth': 15, 'n_estimators': 997, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:37,786]\u001b[0m Trial 6 finished with value: 0.8125 and parameters: {'random_state': 0, 'min_samples_split': 7, 'max_depth': 3, 'n_estimators': 649, 'min_samples_leaf': 10}. Best is trial 2 with value: 0.8583333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8125\n",
            "Model F1-score: 0.812198\n",
            "Model precision-score: 0.819317\n",
            "Model recall-score: 0.8125\n",
            "Model Parameters: {'random_state': 0, ' criterion': 7, 'max_depth': 3, 'n_estimators': 649, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:38,091]\u001b[0m Trial 7 finished with value: 0.8041666666666667 and parameters: {'random_state': 25, 'min_samples_split': 6, 'max_depth': 4, 'n_estimators': 300, 'min_samples_leaf': 2}. Best is trial 2 with value: 0.8583333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.804167\n",
            "Model F1-score: 0.80329\n",
            "Model precision-score: 0.807387\n",
            "Model recall-score: 0.804167\n",
            "Model Parameters: {'random_state': 25, ' criterion': 6, 'max_depth': 4, 'n_estimators': 300, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:38,404]\u001b[0m Trial 8 finished with value: 0.8416666666666667 and parameters: {'random_state': 100, 'min_samples_split': 7, 'max_depth': 3, 'n_estimators': 591, 'min_samples_leaf': 8}. Best is trial 2 with value: 0.8583333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.841667\n",
            "Model F1-score: 0.840125\n",
            "Model precision-score: 0.844539\n",
            "Model recall-score: 0.841667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 7, 'max_depth': 3, 'n_estimators': 591, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:38,715]\u001b[0m Trial 9 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 10, 'max_depth': 2, 'n_estimators': 159, 'min_samples_leaf': 4}. Best is trial 2 with value: 0.8583333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.857151\n",
            "Model precision-score: 0.860237\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 10, 'max_depth': 2, 'n_estimators': 159, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:39,049]\u001b[0m Trial 10 finished with value: 0.8416666666666667 and parameters: {'random_state': 100, 'min_samples_split': 1, 'max_depth': 32, 'n_estimators': 114, 'min_samples_leaf': 7}. Best is trial 2 with value: 0.8583333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.841667\n",
            "Model F1-score: 0.840375\n",
            "Model precision-score: 0.84251\n",
            "Model recall-score: 0.841667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 1, 'max_depth': 32, 'n_estimators': 114, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:39,382]\u001b[0m Trial 11 finished with value: 0.8375 and parameters: {'random_state': 100, 'min_samples_split': 1, 'max_depth': 6, 'n_estimators': 108, 'min_samples_leaf': 10}. Best is trial 2 with value: 0.8583333333333333.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8375\n",
            "Model F1-score: 0.836259\n",
            "Model precision-score: 0.840078\n",
            "Model recall-score: 0.8375\n",
            "Model Parameters: {'random_state': 100, ' criterion': 1, 'max_depth': 6, 'n_estimators': 108, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:39,715]\u001b[0m Trial 12 finished with value: 0.875 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 9, 'n_estimators': 423, 'min_samples_leaf': 8}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.875\n",
            "Model F1-score: 0.874443\n",
            "Model precision-score: 0.877299\n",
            "Model recall-score: 0.875\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 9, 'n_estimators': 423, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:40,043]\u001b[0m Trial 13 finished with value: 0.8666666666666667 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 11, 'n_estimators': 465, 'min_samples_leaf': 8}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.866667\n",
            "Model F1-score: 0.865872\n",
            "Model precision-score: 0.868879\n",
            "Model recall-score: 0.866667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 11, 'n_estimators': 465, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:40,369]\u001b[0m Trial 14 finished with value: 0.8458333333333333 and parameters: {'random_state': None, 'min_samples_split': 3, 'max_depth': 13, 'n_estimators': 456, 'min_samples_leaf': 8}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.845833\n",
            "Model F1-score: 0.846093\n",
            "Model precision-score: 0.84769\n",
            "Model recall-score: 0.845833\n",
            "Model Parameters: {'random_state': None, ' criterion': 3, 'max_depth': 13, 'n_estimators': 456, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:40,716]\u001b[0m Trial 15 finished with value: 0.8666666666666667 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 18, 'n_estimators': 762, 'min_samples_leaf': 9}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.866667\n",
            "Model F1-score: 0.866238\n",
            "Model precision-score: 0.867718\n",
            "Model recall-score: 0.866667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 18, 'n_estimators': 762, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:41,034]\u001b[0m Trial 16 finished with value: 0.8458333333333333 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 9, 'n_estimators': 418, 'min_samples_leaf': 6}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.845833\n",
            "Model F1-score: 0.844752\n",
            "Model precision-score: 0.845887\n",
            "Model recall-score: 0.845833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 9, 'n_estimators': 418, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:41,489]\u001b[0m Trial 17 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 6, 'n_estimators': 711, 'min_samples_leaf': 7}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.856917\n",
            "Model precision-score: 0.861032\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 6, 'n_estimators': 711, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:42,007]\u001b[0m Trial 18 finished with value: 0.775 and parameters: {'random_state': None, 'min_samples_split': 4, 'max_depth': 11, 'n_estimators': 420, 'min_samples_leaf': 9}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.775\n",
            "Model F1-score: 0.775823\n",
            "Model precision-score: 0.789195\n",
            "Model recall-score: 0.775\n",
            "Model Parameters: {'random_state': None, ' criterion': 4, 'max_depth': 11, 'n_estimators': 420, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:42,518]\u001b[0m Trial 19 finished with value: 0.8208333333333333 and parameters: {'random_state': 0, 'min_samples_split': 2, 'max_depth': 20, 'n_estimators': 838, 'min_samples_leaf': 7}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.820833\n",
            "Model F1-score: 0.820349\n",
            "Model precision-score: 0.828135\n",
            "Model recall-score: 0.820833\n",
            "Model Parameters: {'random_state': 0, ' criterion': 2, 'max_depth': 20, 'n_estimators': 838, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:43,025]\u001b[0m Trial 20 finished with value: 0.8708333333333333 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 9, 'n_estimators': 526, 'min_samples_leaf': 5}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.870833\n",
            "Model F1-score: 0.870226\n",
            "Model precision-score: 0.873439\n",
            "Model recall-score: 0.870833\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 9, 'n_estimators': 526, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:43,544]\u001b[0m Trial 21 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 7, 'n_estimators': 553, 'min_samples_leaf': 5}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.862085\n",
            "Model precision-score: 0.864278\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 7, 'n_estimators': 553, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:44,069]\u001b[0m Trial 22 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 12, 'n_estimators': 491, 'min_samples_leaf': 5}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.84888\n",
            "Model precision-score: 0.851117\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 12, 'n_estimators': 491, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:44,582]\u001b[0m Trial 23 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 9, 'n_estimators': 392, 'min_samples_leaf': 9}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.856902\n",
            "Model precision-score: 0.859578\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 9, 'n_estimators': 392, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:44,913]\u001b[0m Trial 24 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 8, 'n_estimators': 629, 'min_samples_leaf': 6}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848505\n",
            "Model precision-score: 0.853323\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 8, 'n_estimators': 629, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:45,262]\u001b[0m Trial 25 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 15, 'n_estimators': 369, 'min_samples_leaf': 7}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853705\n",
            "Model precision-score: 0.855997\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 15, 'n_estimators': 369, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:45,591]\u001b[0m Trial 26 finished with value: 0.825 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 11, 'n_estimators': 494, 'min_samples_leaf': 1}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.825\n",
            "Model F1-score: 0.823842\n",
            "Model precision-score: 0.826705\n",
            "Model recall-score: 0.825\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 11, 'n_estimators': 494, 'min_samples_leaf': 1}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:45,926]\u001b[0m Trial 27 finished with value: 0.8208333333333333 and parameters: {'random_state': 0, 'min_samples_split': 5, 'max_depth': 5, 'n_estimators': 697, 'min_samples_leaf': 5}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.820833\n",
            "Model F1-score: 0.821072\n",
            "Model precision-score: 0.83141\n",
            "Model recall-score: 0.820833\n",
            "Model Parameters: {'random_state': 0, ' criterion': 5, 'max_depth': 5, 'n_estimators': 697, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:46,251]\u001b[0m Trial 28 finished with value: 0.7916666666666666 and parameters: {'random_state': None, 'min_samples_split': 2, 'max_depth': 8, 'n_estimators': 244, 'min_samples_leaf': 8}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.791667\n",
            "Model F1-score: 0.790229\n",
            "Model precision-score: 0.798044\n",
            "Model recall-score: 0.791667\n",
            "Model Parameters: {'random_state': None, ' criterion': 2, 'max_depth': 8, 'n_estimators': 244, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:46,585]\u001b[0m Trial 29 finished with value: 0.7875 and parameters: {'random_state': 25, 'min_samples_split': 4, 'max_depth': 10, 'n_estimators': 914, 'min_samples_leaf': 9}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.7875\n",
            "Model F1-score: 0.787848\n",
            "Model precision-score: 0.796759\n",
            "Model recall-score: 0.7875\n",
            "Model Parameters: {'random_state': 25, ' criterion': 4, 'max_depth': 10, 'n_estimators': 914, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:46,918]\u001b[0m Trial 30 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 14, 'n_estimators': 564, 'min_samples_leaf': 6}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853142\n",
            "Model precision-score: 0.855665\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 14, 'n_estimators': 564, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:47,247]\u001b[0m Trial 31 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 18, 'n_estimators': 806, 'min_samples_leaf': 9}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861388\n",
            "Model precision-score: 0.866052\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 18, 'n_estimators': 806, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:47,580]\u001b[0m Trial 32 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 2, 'max_depth': 19, 'n_estimators': 806, 'min_samples_leaf': 10}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.85669\n",
            "Model precision-score: 0.862292\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 2, 'max_depth': 19, 'n_estimators': 806, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:47,925]\u001b[0m Trial 33 finished with value: 0.8583333333333333 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 12, 'n_estimators': 724, 'min_samples_leaf': 8}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.858333\n",
            "Model F1-score: 0.856649\n",
            "Model precision-score: 0.861843\n",
            "Model recall-score: 0.858333\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 12, 'n_estimators': 724, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:48,258]\u001b[0m Trial 34 finished with value: 0.8375 and parameters: {'random_state': 100, 'min_samples_split': 3, 'max_depth': 7, 'n_estimators': 479, 'min_samples_leaf': 9}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8375\n",
            "Model F1-score: 0.83572\n",
            "Model precision-score: 0.839219\n",
            "Model recall-score: 0.8375\n",
            "Model Parameters: {'random_state': 100, ' criterion': 3, 'max_depth': 7, 'n_estimators': 479, 'min_samples_leaf': 9}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:48,595]\u001b[0m Trial 35 finished with value: 0.8208333333333333 and parameters: {'random_state': 0, 'min_samples_split': 4, 'max_depth': 9, 'n_estimators': 339, 'min_samples_leaf': 7}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.820833\n",
            "Model F1-score: 0.820087\n",
            "Model precision-score: 0.826827\n",
            "Model recall-score: 0.820833\n",
            "Model Parameters: {'random_state': 0, ' criterion': 4, 'max_depth': 9, 'n_estimators': 339, 'min_samples_leaf': 7}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:48,933]\u001b[0m Trial 36 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 16, 'n_estimators': 602, 'min_samples_leaf': 3}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.848703\n",
            "Model precision-score: 0.850464\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 16, 'n_estimators': 602, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:49,522]\u001b[0m Trial 37 finished with value: 0.7916666666666666 and parameters: {'random_state': 25, 'min_samples_split': 6, 'max_depth': 12, 'n_estimators': 533, 'min_samples_leaf': 4}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.791667\n",
            "Model F1-score: 0.790784\n",
            "Model precision-score: 0.795775\n",
            "Model recall-score: 0.791667\n",
            "Model Parameters: {'random_state': 25, ' criterion': 6, 'max_depth': 12, 'n_estimators': 533, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:50,113]\u001b[0m Trial 38 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 1, 'max_depth': 21, 'n_estimators': 667, 'min_samples_leaf': 10}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.852963\n",
            "Model precision-score: 0.856068\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 1, 'max_depth': 21, 'n_estimators': 667, 'min_samples_leaf': 10}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:50,451]\u001b[0m Trial 39 finished with value: 0.85 and parameters: {'random_state': None, 'min_samples_split': 3, 'max_depth': 10, 'n_estimators': 931, 'min_samples_leaf': 8}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.84997\n",
            "Model precision-score: 0.850288\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': None, ' criterion': 3, 'max_depth': 10, 'n_estimators': 931, 'min_samples_leaf': 8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:50,780]\u001b[0m Trial 40 finished with value: 0.8 and parameters: {'random_state': 25, 'min_samples_split': 6, 'max_depth': 14, 'n_estimators': 294, 'min_samples_leaf': 6}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8\n",
            "Model F1-score: 0.79921\n",
            "Model precision-score: 0.803391\n",
            "Model recall-score: 0.8\n",
            "Model Parameters: {'random_state': 25, ' criterion': 6, 'max_depth': 14, 'n_estimators': 294, 'min_samples_leaf': 6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:51,126]\u001b[0m Trial 41 finished with value: 0.875 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 7, 'n_estimators': 546, 'min_samples_leaf': 5}. Best is trial 12 with value: 0.875.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.875\n",
            "Model F1-score: 0.874425\n",
            "Model precision-score: 0.875144\n",
            "Model recall-score: 0.875\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 7, 'n_estimators': 546, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:51,462]\u001b[0m Trial 42 finished with value: 0.8791666666666667 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 7, 'n_estimators': 445, 'min_samples_leaf': 5}. Best is trial 42 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.879167\n",
            "Model F1-score: 0.878172\n",
            "Model precision-score: 0.88179\n",
            "Model recall-score: 0.879167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 7, 'n_estimators': 445, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:51,787]\u001b[0m Trial 43 finished with value: 0.8416666666666667 and parameters: {'random_state': 100, 'min_samples_split': 4, 'max_depth': 5, 'n_estimators': 457, 'min_samples_leaf': 5}. Best is trial 42 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.841667\n",
            "Model F1-score: 0.84003\n",
            "Model precision-score: 0.841819\n",
            "Model recall-score: 0.841667\n",
            "Model Parameters: {'random_state': 100, ' criterion': 4, 'max_depth': 5, 'n_estimators': 457, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:52,147]\u001b[0m Trial 44 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 5, 'max_depth': 7, 'n_estimators': 514, 'min_samples_leaf': 4}. Best is trial 42 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.849162\n",
            "Model precision-score: 0.851234\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 5, 'max_depth': 7, 'n_estimators': 514, 'min_samples_leaf': 4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:52,484]\u001b[0m Trial 45 finished with value: 0.8791666666666667 and parameters: {'random_state': 100, 'min_samples_split': 8, 'max_depth': 8, 'n_estimators': 439, 'min_samples_leaf': 3}. Best is trial 42 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.879167\n",
            "Model F1-score: 0.878249\n",
            "Model precision-score: 0.879455\n",
            "Model recall-score: 0.879167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 8, 'max_depth': 8, 'n_estimators': 439, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:52,812]\u001b[0m Trial 46 finished with value: 0.85 and parameters: {'random_state': 100, 'min_samples_split': 8, 'max_depth': 8, 'n_estimators': 330, 'min_samples_leaf': 3}. Best is trial 42 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.85\n",
            "Model F1-score: 0.84887\n",
            "Model precision-score: 0.850343\n",
            "Model recall-score: 0.85\n",
            "Model Parameters: {'random_state': 100, ' criterion': 8, 'max_depth': 8, 'n_estimators': 330, 'min_samples_leaf': 3}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:53,162]\u001b[0m Trial 47 finished with value: 0.825 and parameters: {'random_state': 0, 'min_samples_split': 10, 'max_depth': 6, 'n_estimators': 419, 'min_samples_leaf': 2}. Best is trial 42 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.825\n",
            "Model F1-score: 0.824782\n",
            "Model precision-score: 0.829113\n",
            "Model recall-score: 0.825\n",
            "Model Parameters: {'random_state': 0, ' criterion': 10, 'max_depth': 6, 'n_estimators': 419, 'min_samples_leaf': 2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:53,504]\u001b[0m Trial 48 finished with value: 0.8541666666666666 and parameters: {'random_state': 100, 'min_samples_split': 8, 'max_depth': 5, 'n_estimators': 588, 'min_samples_leaf': 5}. Best is trial 42 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.854167\n",
            "Model F1-score: 0.853331\n",
            "Model precision-score: 0.854129\n",
            "Model recall-score: 0.854167\n",
            "Model Parameters: {'random_state': 100, ' criterion': 8, 'max_depth': 5, 'n_estimators': 588, 'min_samples_leaf': 5}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[32m[I 2023-02-19 13:16:53,994]\u001b[0m Trial 49 finished with value: 0.8625 and parameters: {'random_state': 100, 'min_samples_split': 9, 'max_depth': 7, 'n_estimators': 232, 'min_samples_leaf': 2}. Best is trial 42 with value: 0.8791666666666667.\u001b[0m\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model Accuracy: 0.8625\n",
            "Model F1-score: 0.861722\n",
            "Model precision-score: 0.862517\n",
            "Model recall-score: 0.8625\n",
            "Model Parameters: {'random_state': 100, ' criterion': 9, 'max_depth': 7, 'n_estimators': 232, 'min_samples_leaf': 2}\n",
            "Number of finished trials: 50\n",
            "Best trial:\n",
            "  Value: 0.8791666666666667\n",
            "  Params: \n",
            "    random_state: 100\n",
            "    min_samples_split: 4\n",
            "    max_depth: 7\n",
            "    n_estimators: 445\n",
            "    min_samples_leaf: 5\n"
          ]
        }
      ],
      "source": [
        "import optuna\n",
        "from sklearn.ensemble import RandomForestClassifier\n",
        "\n",
        "def objective(trial):\n",
        "    param = {\n",
        "        'random_state': trial.suggest_categorical('random_state', [0, 25, 100, None]),\n",
        "       \" criterion\" : trial.suggest_int('min_samples_split', 1,10),\n",
        "        \"max_depth\" : trial.suggest_int(\"max_depth\", 2, 32, log=True),\n",
        "        \"n_estimators\" : trial.suggest_int(\"n_estimators\", 100,1000),\n",
        "        \"min_samples_leaf\" : trial.suggest_int(\"min_samples_leaf\", 1,10)\n",
        "        # 'bootstrap': trial.suggest_categorical('bootstrap' , [True, False]),\n",
        "        # 'max_features': trial.suggest_categorical(\"max_features\" , ['auto', 'sqrt'])\n",
        "    }\n",
        "\n",
        "    suggested_random_state = param['random_state']  # also use the suggested random state value in train_test_split()\n",
        "    \n",
        "    \n",
        "    X_train, X_test, y_train, y_test = train_test_split(X_mean, y, test_size=0.2, shuffle=True, random_state=suggested_random_state)\n",
        "    clf = RandomForestClassifier()\n",
        "\n",
        "    clf.fit(X_train, y_train)\n",
        "    y_pred = clf.predict(X_test)\n",
        "    acc = accuracy_score(y_pred, y_test)\n",
        "    f1 = f1_score(y_pred, y_test, average='weighted') # Calculate F1-score #average='weighted'\n",
        "    precision = precision_score(y_pred, y_test, average='weighted') # Calculate Precision\n",
        "    recall = recall_score(y_pred, y_test, average='weighted')  # Calculate Recall\n",
        "    print(f\"Model Accuracy: {round(acc, 6)}\")\n",
        "    print(f\"Model F1-score: {round(f1, 6)}\")\n",
        "    print(f\"Model precision-score: {round(precision, 6)}\")\n",
        "    print(f\"Model recall-score: {round(recall, 6)}\")\n",
        "\n",
        "    print(f\"Model Parameters: {param}\")\n",
        "    \n",
        "    return acc  # return our objective value\n",
        "    return f1\n",
        "    return precision\n",
        "    return recall\n",
        "\n",
        "\n",
        "\n",
        "if __name__ == \"__main__\":\n",
        "    study = optuna.create_study(\n",
        "        direction=\"maximize\",\n",
        "        sampler=optuna.samplers.TPESampler()\n",
        "    )\n",
        "    study.optimize(objective, n_trials=50)\n",
        "\n",
        "    print(\"Number of finished trials: {}\".format(len(study.trials)))\n",
        "\n",
        "    print(\"Best trial:\")\n",
        "    trial = study.best_trial\n",
        "\n",
        "    print(\"  Value: {}\".format(trial.value))\n",
        "\n",
        "    print(\"  Params: \")\n",
        "    for key, value in trial.params.items():\n",
        "        print(\"    {}: {}\".format(key, value))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 153,
      "metadata": {
        "id": "BO_5G4hE6Jv_",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "90b9d6f4-7d7a-4a8b-c64d-133824ef9da5"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "- ROC_AUC of RFC: 0.9578\n",
            "10-fold CV mean of RFC: 0.9098\n",
            "10-fold CV std of RFC: 0.0435\n"
          ]
        }
      ],
      "source": [
        "mean_pipeline1 = Pipeline(steps=[\n",
        "    ('Imputer', SimpleImputer(strategy='mean')),\n",
        "    ('RobustScaler', RobustScaler())\n",
        "])\n",
        "\n",
        "X_mean1= mean_pipeline1.fit_transform(X_full)\n",
        "X_train_mean, X_test_mean, y_train_mean, y_test_mean = train_test_split(X_mean1, y_full, test_size=0.2, random_state=42)\n",
        "\n",
        "model_rf_mean = RandomForestClassifier(random_state= 100,\n",
        "    min_samples_split= 9,\n",
        "    max_depth= 17,\n",
        "    n_estimators= 959,\n",
        "    min_samples_leaf= 1 ).fit(X_train_mean, y_train_mean)\n",
        "\n",
        "probs_rf_mean = model_rf_mean.predict_proba(X_test_mean)#[:, 1]\n",
        "auc_rf1mean = roc_auc_score(y_test_mean, probs_rf_mean, multi_class='ovr')\n",
        "\n",
        "print('- ROC_AUC of RFC: {:.4f}'.format(auc_rf1mean))\n",
        "\n",
        "\n",
        "### meantuned CV score\n",
        "RFC_mean = RandomForestClassifier( random_state= 100,\n",
        "    min_samples_split= 9,\n",
        "    max_depth= 9,\n",
        "    n_estimators= 708,\n",
        "    min_samples_leaf= 6  )\n",
        "\n",
        "scores_cv_rf= cross_val_score(RFC_mean, X_mean1, y_full, cv=10, scoring='roc_auc_ovr').mean()\n",
        "scores_cv_rf_std= cross_val_score(RFC_mean, X_mean1, y_full, cv=10, scoring='roc_auc_ovr').std()\n",
        "#print('10-fold CV of RFC', (scores_cv_rf))\n",
        "print('10-fold CV mean of RFC: {:.4f}'.format(scores_cv_rf))\n",
        "print('10-fold CV std of RFC: {:.4f}'.format(scores_cv_rf_std))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 154,
      "metadata": {
        "id": "PhIN089uohRs"
      },
      "outputs": [],
      "source": []
    },
    {
      "cell_type": "code",
      "execution_count": 154,
      "metadata": {
        "id": "PF7-TqJBohVK"
      },
      "outputs": [],
      "source": []
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3 (ipykernel)",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.12"
    },
    "vscode": {
      "interpreter": {
        "hash": "b7082a90f0341f66b325168da8fc238f0b2aba7ee16848d917086bb4ed45c134"
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}