PLN-G4-PF / Modelos.ipynb
Modelos.ipynb
Raw
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Creación de los Modelos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 311,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "from sklearn.neighbors import KNeighborsRegressor\n",
    "from sklearn.ensemble import GradientBoostingRegressor\n",
    "from sklearn.linear_model import Lasso, Ridge\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.svm import SVR\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "\n",
    "from sklearn.ensemble import GradientBoostingClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from sklearn.metrics import precision_recall_fscore_support"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 312,
   "metadata": {},
   "outputs": [],
   "source": [
    "name = 'variables_procesadas_bert_BetoSentimentAnalysis.csv'\n",
    "df_variables = pd.read_csv('datasets/{}'.format(name), index_col=[0])\n",
    "X, y = df_variables[['support_rate_rodolfo', 'tasa_aumento_pib', 'tasa_aumento_desempleo']], df_variables['support_rate_rodolfo_real']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 313,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5682)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Modelos de Regresión"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 314,
   "metadata": {},
   "outputs": [],
   "source": [
    "resultados_reg = []"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### K-Nearest Neighbors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 315,
   "metadata": {},
   "outputs": [],
   "source": [
    "KNN_reg = KNeighborsRegressor(n_neighbors=3)\n",
    "KNN_reg.fit(X_train, y_train)\n",
    "y_pred = KNN_reg.predict(X_test)\n",
    "rmse_KNN_reg = mean_squared_error(y_test, y_pred, squared=False)\n",
    "resultados_reg.append(['k-Nearest-Neighbors Regression', rmse_KNN_reg])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Gradient Boosting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 316,
   "metadata": {},
   "outputs": [],
   "source": [
    "GBT_reg = GradientBoostingRegressor(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)\n",
    "GBT_reg.fit(X_train, y_train)\n",
    "y_pred = GBT_reg.predict(X_test)\n",
    "rmse_GBT_reg = mean_squared_error(y_test, y_pred, squared=False)\n",
    "resultados_reg.append(['Gradient Boosting Trees Regression', rmse_GBT_reg])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Regresión Lasso"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 317,
   "metadata": {},
   "outputs": [],
   "source": [
    "RL_reg = Lasso(alpha=0.1)\n",
    "RL_reg.fit(X_train, y_train)\n",
    "y_pred = RL_reg.predict(X_test)\n",
    "rmse_RL_reg = mean_squared_error(y_test, y_pred, squared=False)\n",
    "resultados_reg.append(['Lasso Regression', rmse_RL_reg])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Regresión Ridge"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 318,
   "metadata": {},
   "outputs": [],
   "source": [
    "RR_reg = Ridge(alpha=0.1)\n",
    "RR_reg.fit(X_train, y_train)\n",
    "y_pred = RR_reg.predict(X_test)\n",
    "rmse_RR_reg = mean_squared_error(y_test, y_pred, squared=False)\n",
    "resultados_reg.append(['Ridge Regression', rmse_RR_reg])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Regresión Lineal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 319,
   "metadata": {},
   "outputs": [],
   "source": [
    "LR_reg = LinearRegression()\n",
    "LR_reg.fit(X_train, y_train)\n",
    "y_pred = LR_reg.predict(X_test)\n",
    "rmse_LR_reg = mean_squared_error(y_test, y_pred, squared=False)\n",
    "resultados_reg.append(['Linear Regression', rmse_LR_reg])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Support Vector Regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 320,
   "metadata": {},
   "outputs": [],
   "source": [
    "SVR_reg = SVR(C=1.0, epsilon=0.2)\n",
    "SVR_reg.fit(X_train, y_train)\n",
    "y_pred = SVR_reg.predict(X_test)\n",
    "rmse_SVR_reg = mean_squared_error(y_test, y_pred, squared=False)\n",
    "resultados_reg.append(['Support Vector Regression', rmse_SVR_reg])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Random Forest Regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 321,
   "metadata": {},
   "outputs": [],
   "source": [
    "RF_reg = RandomForestRegressor(max_depth=2, random_state=0)\n",
    "RF_reg.fit(X_train, y_train)\n",
    "y_pred = RF_reg.predict(X_test)\n",
    "rmse_RF_reg = mean_squared_error(y_test, y_pred, squared=False)\n",
    "resultados_reg.append(['Random Forest Regression', rmse_RF_reg])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### MLP Regressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 322,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1/1 [==============================] - 0s 203ms/step\n"
     ]
    }
   ],
   "source": [
    "model = keras.models.Sequential()\n",
    "model.add(keras.layers.Dense(4, activation=\"relu\"))\n",
    "model.add(keras.layers.Dense(3, activation=\"relu\"))\n",
    "model.add(keras.layers.Dense(2, activation=\"relu\"))\n",
    "model.add(keras.layers.Dense(1))\n",
    "\n",
    "model.compile(loss=\"mean_squared_error\", optimizer=\"adam\", metrics=[\"mse\"])\n",
    "history = model.fit(X_train, y_train, epochs=30, verbose=False)\n",
    "y_pred = model.predict(X_test)\n",
    "\n",
    "rmse_MLP_reg = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(y_test, y_pred)))).numpy()\n",
    "resultados_reg.append(['MLP Regression', rmse_MLP_reg])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 323,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_resultados_reg = pd.DataFrame(data=resultados_reg, columns=['Model', 'RMSE'])\n",
    "df_resultados_reg.sort_values(by=['RMSE'], ascending=True).to_csv('resultados/regresion_{}'.format(name))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Modelos de Clasificación"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 324,
   "metadata": {},
   "outputs": [],
   "source": [
    "resultados_class = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 325,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_class = np.round(y).astype(int)\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y_class, test_size=0.3, random_state=5682)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Gradient Boosting Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 326,
   "metadata": {},
   "outputs": [],
   "source": [
    "GB_class = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)\n",
    "GB_class.fit(X_train, y_train)\n",
    "y_pred = GB_class.predict(X_test)\n",
    "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n",
    "resultados_class.append(['Gradient Boosting Classifier', prec, rec, fscore])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Decision Tree Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 327,
   "metadata": {},
   "outputs": [],
   "source": [
    "DT_class = DecisionTreeClassifier(random_state=0)\n",
    "DT_class.fit(X_train, y_train)\n",
    "y_pred = DT_class.predict(X_test)\n",
    "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n",
    "resultados_class.append(['Decision Tree Classifier', prec, rec, fscore])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Logistic Rgression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 328,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\Anaconda_39\\envs\\DL\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1327: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
      "  _warn_prf(average, modifier, msg_start, len(result))\n"
     ]
    }
   ],
   "source": [
    "LR_class = LogisticRegression(random_state=0)\n",
    "LR_class.fit(X_train, y_train)\n",
    "y_pred = LR_class.predict(X_test)\n",
    "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n",
    "resultados_class.append(['Logistic Regression Classifier', prec, rec, fscore])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Support Vector Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 329,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\Anaconda_39\\envs\\DL\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1327: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
      "  _warn_prf(average, modifier, msg_start, len(result))\n"
     ]
    }
   ],
   "source": [
    "SV_class = SVC(gamma='auto')\n",
    "SV_class.fit(X_train, y_train)\n",
    "y_pred = SV_class.predict(X_test)\n",
    "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n",
    "resultados_class.append(['Support Vector Classifier', prec, rec, fscore])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### K-Nearest-Neighbors Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 330,
   "metadata": {},
   "outputs": [],
   "source": [
    "KNN_class = KNeighborsClassifier(n_neighbors=3)\n",
    "KNN_class.fit(X_train, y_train)\n",
    "y_pred = KNN_class.predict(X_test)\n",
    "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n",
    "resultados_class.append(['K-Nearest-Neighbors Classifier', prec, rec, fscore])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Random Forest Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 331,
   "metadata": {},
   "outputs": [],
   "source": [
    "RF_class = RandomForestClassifier(max_depth=2, random_state=0)\n",
    "RF_class.fit(X_train, y_train)\n",
    "y_pred = RF_class.predict(X_test)\n",
    "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n",
    "resultados_class.append(['Random Forest Classifier', prec, rec, fscore])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Gaussian Naive Bayes Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 332,
   "metadata": {},
   "outputs": [],
   "source": [
    "GNB_class = GaussianNB()\n",
    "GNB_class.fit(X_train, y_train)\n",
    "y_pred = GNB_class.predict(X_test)\n",
    "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n",
    "resultados_class.append(['Gaussian Naive Bayes Classifier', prec, rec, fscore])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### MLP Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 333,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1/1 [==============================] - 0s 179ms/step\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\Anaconda_39\\envs\\DL\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1327: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
      "  _warn_prf(average, modifier, msg_start, len(result))\n"
     ]
    }
   ],
   "source": [
    "model = keras.models.Sequential()\n",
    "model.add(keras.layers.Dense(4, activation=\"relu\"))\n",
    "model.add(keras.layers.Dense(3, activation=\"relu\"))\n",
    "model.add(keras.layers.Dense(2, activation=\"relu\"))\n",
    "model.add(keras.layers.Dense(1, activation='softmax'))\n",
    "\n",
    "model.compile(loss=\"binary_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n",
    "history = model.fit(X_train, y_train, epochs=30, verbose=False)\n",
    "y_pred = model.predict(X_test)\n",
    "\n",
    "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n",
    "resultados_class.append(['MLP Classifier', prec, rec, fscore])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 334,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_resultados_class = pd.DataFrame(data=resultados_class, columns=['Model', 'Precision', 'Recall', 'F-score'])\n",
    "df_resultados_class.sort_values(by=['F-score', 'Precision', 'Recall'], ascending=False).to_csv('resultados/clasificacion_{}'.format(name))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.10.4 ('DL')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "7b12e629898a100bac456066adb1052da5bab249d92357a99acd404c7e8e3e0e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}