{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Creación de los Modelos" ] }, { "cell_type": "code", "execution_count": 311, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "from sklearn.neighbors import KNeighborsRegressor\n", "from sklearn.ensemble import GradientBoostingRegressor\n", "from sklearn.linear_model import Lasso, Ridge\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.svm import SVR\n", "from sklearn.ensemble import RandomForestRegressor\n", "\n", "from sklearn.ensemble import GradientBoostingClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.svm import SVC\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.naive_bayes import GaussianNB\n", "\n", "import tensorflow as tf\n", "from tensorflow import keras\n", "\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.metrics import precision_recall_fscore_support" ] }, { "cell_type": "code", "execution_count": 312, "metadata": {}, "outputs": [], "source": [ "name = 'variables_procesadas_bert_BetoSentimentAnalysis.csv'\n", "df_variables = pd.read_csv('datasets/{}'.format(name), index_col=[0])\n", "X, y = df_variables[['support_rate_rodolfo', 'tasa_aumento_pib', 'tasa_aumento_desempleo']], df_variables['support_rate_rodolfo_real']" ] }, { "cell_type": "code", "execution_count": 313, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5682)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Modelos de Regresión" ] }, { "cell_type": "code", "execution_count": 314, "metadata": {}, "outputs": [], "source": [ "resultados_reg = []" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### K-Nearest Neighbors" ] }, { "cell_type": "code", "execution_count": 315, "metadata": {}, "outputs": [], "source": [ "KNN_reg = KNeighborsRegressor(n_neighbors=3)\n", "KNN_reg.fit(X_train, y_train)\n", "y_pred = KNN_reg.predict(X_test)\n", "rmse_KNN_reg = mean_squared_error(y_test, y_pred, squared=False)\n", "resultados_reg.append(['k-Nearest-Neighbors Regression', rmse_KNN_reg])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Gradient Boosting" ] }, { "cell_type": "code", "execution_count": 316, "metadata": {}, "outputs": [], "source": [ "GBT_reg = GradientBoostingRegressor(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)\n", "GBT_reg.fit(X_train, y_train)\n", "y_pred = GBT_reg.predict(X_test)\n", "rmse_GBT_reg = mean_squared_error(y_test, y_pred, squared=False)\n", "resultados_reg.append(['Gradient Boosting Trees Regression', rmse_GBT_reg])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Regresión Lasso" ] }, { "cell_type": "code", "execution_count": 317, "metadata": {}, "outputs": [], "source": [ "RL_reg = Lasso(alpha=0.1)\n", "RL_reg.fit(X_train, y_train)\n", "y_pred = RL_reg.predict(X_test)\n", "rmse_RL_reg = mean_squared_error(y_test, y_pred, squared=False)\n", "resultados_reg.append(['Lasso Regression', rmse_RL_reg])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Regresión Ridge" ] }, { "cell_type": "code", "execution_count": 318, "metadata": {}, "outputs": [], "source": [ "RR_reg = Ridge(alpha=0.1)\n", "RR_reg.fit(X_train, y_train)\n", "y_pred = RR_reg.predict(X_test)\n", "rmse_RR_reg = mean_squared_error(y_test, y_pred, squared=False)\n", "resultados_reg.append(['Ridge Regression', rmse_RR_reg])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Regresión Lineal" ] }, { "cell_type": "code", "execution_count": 319, "metadata": {}, "outputs": [], "source": [ "LR_reg = LinearRegression()\n", "LR_reg.fit(X_train, y_train)\n", "y_pred = LR_reg.predict(X_test)\n", "rmse_LR_reg = mean_squared_error(y_test, y_pred, squared=False)\n", "resultados_reg.append(['Linear Regression', rmse_LR_reg])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Support Vector Regression" ] }, { "cell_type": "code", "execution_count": 320, "metadata": {}, "outputs": [], "source": [ "SVR_reg = SVR(C=1.0, epsilon=0.2)\n", "SVR_reg.fit(X_train, y_train)\n", "y_pred = SVR_reg.predict(X_test)\n", "rmse_SVR_reg = mean_squared_error(y_test, y_pred, squared=False)\n", "resultados_reg.append(['Support Vector Regression', rmse_SVR_reg])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Random Forest Regression" ] }, { "cell_type": "code", "execution_count": 321, "metadata": {}, "outputs": [], "source": [ "RF_reg = RandomForestRegressor(max_depth=2, random_state=0)\n", "RF_reg.fit(X_train, y_train)\n", "y_pred = RF_reg.predict(X_test)\n", "rmse_RF_reg = mean_squared_error(y_test, y_pred, squared=False)\n", "resultados_reg.append(['Random Forest Regression', rmse_RF_reg])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### MLP Regressor" ] }, { "cell_type": "code", "execution_count": 322, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1/1 [==============================] - 0s 203ms/step\n" ] } ], "source": [ "model = keras.models.Sequential()\n", "model.add(keras.layers.Dense(4, activation=\"relu\"))\n", "model.add(keras.layers.Dense(3, activation=\"relu\"))\n", "model.add(keras.layers.Dense(2, activation=\"relu\"))\n", "model.add(keras.layers.Dense(1))\n", "\n", "model.compile(loss=\"mean_squared_error\", optimizer=\"adam\", metrics=[\"mse\"])\n", "history = model.fit(X_train, y_train, epochs=30, verbose=False)\n", "y_pred = model.predict(X_test)\n", "\n", "rmse_MLP_reg = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(y_test, y_pred)))).numpy()\n", "resultados_reg.append(['MLP Regression', rmse_MLP_reg])" ] }, { "cell_type": "code", "execution_count": 323, "metadata": {}, "outputs": [], "source": [ "df_resultados_reg = pd.DataFrame(data=resultados_reg, columns=['Model', 'RMSE'])\n", "df_resultados_reg.sort_values(by=['RMSE'], ascending=True).to_csv('resultados/regresion_{}'.format(name))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Modelos de Clasificación" ] }, { "cell_type": "code", "execution_count": 324, "metadata": {}, "outputs": [], "source": [ "resultados_class = []" ] }, { "cell_type": "code", "execution_count": 325, "metadata": {}, "outputs": [], "source": [ "y_class = np.round(y).astype(int)\n", "X_train, X_test, y_train, y_test = train_test_split(X, y_class, test_size=0.3, random_state=5682)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Gradient Boosting Classifier" ] }, { "cell_type": "code", "execution_count": 326, "metadata": {}, "outputs": [], "source": [ "GB_class = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)\n", "GB_class.fit(X_train, y_train)\n", "y_pred = GB_class.predict(X_test)\n", "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n", "resultados_class.append(['Gradient Boosting Classifier', prec, rec, fscore])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Decision Tree Classifier" ] }, { "cell_type": "code", "execution_count": 327, "metadata": {}, "outputs": [], "source": [ "DT_class = DecisionTreeClassifier(random_state=0)\n", "DT_class.fit(X_train, y_train)\n", "y_pred = DT_class.predict(X_test)\n", "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n", "resultados_class.append(['Decision Tree Classifier', prec, rec, fscore])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Logistic Rgression" ] }, { "cell_type": "code", "execution_count": 328, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\Anaconda_39\\envs\\DL\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1327: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ] } ], "source": [ "LR_class = LogisticRegression(random_state=0)\n", "LR_class.fit(X_train, y_train)\n", "y_pred = LR_class.predict(X_test)\n", "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n", "resultados_class.append(['Logistic Regression Classifier', prec, rec, fscore])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Support Vector Classifier" ] }, { "cell_type": "code", "execution_count": 329, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\Anaconda_39\\envs\\DL\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1327: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ] } ], "source": [ "SV_class = SVC(gamma='auto')\n", "SV_class.fit(X_train, y_train)\n", "y_pred = SV_class.predict(X_test)\n", "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n", "resultados_class.append(['Support Vector Classifier', prec, rec, fscore])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### K-Nearest-Neighbors Classifier" ] }, { "cell_type": "code", "execution_count": 330, "metadata": {}, "outputs": [], "source": [ "KNN_class = KNeighborsClassifier(n_neighbors=3)\n", "KNN_class.fit(X_train, y_train)\n", "y_pred = KNN_class.predict(X_test)\n", "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n", "resultados_class.append(['K-Nearest-Neighbors Classifier', prec, rec, fscore])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Random Forest Classifier" ] }, { "cell_type": "code", "execution_count": 331, "metadata": {}, "outputs": [], "source": [ "RF_class = RandomForestClassifier(max_depth=2, random_state=0)\n", "RF_class.fit(X_train, y_train)\n", "y_pred = RF_class.predict(X_test)\n", "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n", "resultados_class.append(['Random Forest Classifier', prec, rec, fscore])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Gaussian Naive Bayes Classifier" ] }, { "cell_type": "code", "execution_count": 332, "metadata": {}, "outputs": [], "source": [ "GNB_class = GaussianNB()\n", "GNB_class.fit(X_train, y_train)\n", "y_pred = GNB_class.predict(X_test)\n", "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n", "resultados_class.append(['Gaussian Naive Bayes Classifier', prec, rec, fscore])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### MLP Classifier" ] }, { "cell_type": "code", "execution_count": 333, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1/1 [==============================] - 0s 179ms/step\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "d:\\Anaconda_39\\envs\\DL\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1327: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ] } ], "source": [ "model = keras.models.Sequential()\n", "model.add(keras.layers.Dense(4, activation=\"relu\"))\n", "model.add(keras.layers.Dense(3, activation=\"relu\"))\n", "model.add(keras.layers.Dense(2, activation=\"relu\"))\n", "model.add(keras.layers.Dense(1, activation='softmax'))\n", "\n", "model.compile(loss=\"binary_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n", "history = model.fit(X_train, y_train, epochs=30, verbose=False)\n", "y_pred = model.predict(X_test)\n", "\n", "prec, rec, fscore, supp = precision_recall_fscore_support(y_test, y_pred, average='macro')\n", "resultados_class.append(['MLP Classifier', prec, rec, fscore])" ] }, { "cell_type": "code", "execution_count": 334, "metadata": {}, "outputs": [], "source": [ "df_resultados_class = pd.DataFrame(data=resultados_class, columns=['Model', 'Precision', 'Recall', 'F-score'])\n", "df_resultados_class.sort_values(by=['F-score', 'Precision', 'Recall'], ascending=False).to_csv('resultados/clasificacion_{}'.format(name))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10.4 ('DL')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "7b12e629898a100bac456066adb1052da5bab249d92357a99acd404c7e8e3e0e" } } }, "nbformat": 4, "nbformat_minor": 2 }