In [1]:
import geopandas as gpd

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import Lasso, Ridge
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
import pickle

from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error

import numpy as np
import pandas as pd

In [2]:
def preprocess_inputs(name):
    data_geo = gpd.read_file('https://raw.githubusercontent.com/CMinge77/DBjson/main/uBurt.json')
    df_variables = pd.read_csv('datasets/{}'.format(name), index_col=[0])

    X, y = df_variables[['support_rate_rodolfo', 'tasa_aumento_pib', 'tasa_aumento_desempleo']], df_variables['support_rate_rodolfo_real']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5682)
    RF_reg = RandomForestRegressor(max_depth=2, random_state=0)
    RF_reg.fit(X_train, y_train)

    y_class = np.round(y).astype(int)
    X_train, X_test, y_train, y_test = train_test_split(X, y_class, test_size=0.3, random_state=5682)
    DT_class = DecisionTreeClassifier(random_state=0)
    DT_class.fit(X_train, y_train)

    df_variables['prediccion_regresion'] = RF_reg.predict(df_variables[['support_rate_rodolfo','tasa_aumento_pib','tasa_aumento_desempleo']])
    df_variables['prediccion_clasificacion'] = DT_class.predict(df_variables[['support_rate_rodolfo','tasa_aumento_pib','tasa_aumento_desempleo']])
    df_variables['ubicacion'] = df_variables['ubicacion'].str.replace('Bogotá', 'Cundinamarca')

    data_geo = data_geo.merge(df_variables, left_on='NAME_1', right_on='ubicacion', how='left')
    data_geo = data_geo.drop('ubicacion', axis=1)

    data_geo['support_rate_rodolfo'] = (data_geo['support_rate_rodolfo']*100)
    data_geo['tasa_aumento_pib'] = (data_geo['tasa_aumento_pib']*100)
    data_geo['tasa_aumento_desempleo'] = (data_geo['tasa_aumento_desempleo']*100)
    data_geo['support_rate_rodolfo_real'] = (data_geo['support_rate_rodolfo_real']*100)
    data_geo['support_rate_rodolfo_real_b'] = np.round(data_geo['support_rate_rodolfo_real']/100)

    data_geo['support_rate_rodolfo'] = pd.to_numeric(data_geo['support_rate_rodolfo'],errors='coerce')
    data_geo['tasa_aumento_pib'] = pd.to_numeric(data_geo['tasa_aumento_pib'],errors='coerce')
    data_geo['tasa_aumento_desempleo'] = pd.to_numeric(data_geo['tasa_aumento_desempleo'],errors='coerce')
    data_geo['support_rate_rodolfo_real'] = pd.to_numeric(data_geo['support_rate_rodolfo_real'],errors='coerce')
    data_geo['prediccion_clasificacion'] = pd.to_numeric(data_geo['prediccion_clasificacion'],errors='coerce')
    data_geo['prediccion_regresion'] = pd.to_numeric(data_geo['prediccion_regresion'],errors='coerce')
    data_geo['VARNAME_1'] = pd.to_numeric(data_geo['VARNAME_1'],errors='coerce')

    data_geo = data_geo.drop_duplicates()

    return data_geo

In [3]:
def load_model(sentiment_model_name, model_name, dept):
    
    sentiment_model_name = sentiment_model_name.get()
    model_name = model_name.get()
    dept = dept.get()

    if sentiment_model_name == 'Robertuito':
        name = 'variables_procesadas_robertuito-sentiment-analysis.csv'
    elif sentiment_model_name == 'Beto':
        name = 'variables_procesadas_bert_BetoSentimentAnalysis.csv'
    elif sentiment_model_name == 'Codeswitch':
        name = 'variables_procesadas_codeswitch-spaeng-sentiment-analysis-lince.csv'
    else:
        name = 'variables_procesadas_robertuito-sentiment-analysis.csv'

    df_variables = pd.read_csv('datasets/{}'.format(name), index_col=[0])
    X, y = df_variables[['support_rate_rodolfo', 'tasa_aumento_pib', 'tasa_aumento_desempleo']], df_variables['support_rate_rodolfo_real']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5682)
    
    y_class = np.round(y).astype(int)
    X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X, y_class, test_size=0.3, random_state=5682)

    if model_name == 'Gaussian Naive Bayes Classifier':
        model = GaussianNB()
        model.fit(X_train_c, y_train_c)
        y_pred = model.predict(X_test_c)
    elif model_name == 'Random Forest Classifier':
        model = RandomForestClassifier(max_depth=2, random_state=0)
        model.fit(X_train_c, y_train_c)
        y_pred = model.predict(X_test_c)
    elif model_name == 'MLP Classifier':
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(4, activation="relu"))
        model.add(keras.layers.Dense(3, activation="relu"))
        model.add(keras.layers.Dense(2, activation="relu"))
        model.add(keras.layers.Dense(1, activation='softmax'))
        model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
        history = model.fit(X_train_c, y_train_c, epochs=30, verbose=False)
        y_pred = model.predict(X_test_c, verbose=False)
    elif model_name == 'Gradient Boosting Classifier':
        model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
        model.fit(X_train_c, y_train_c)
        y_pred = model.predict(X_test_c)
    elif model_name == 'Decision Tree Classifier':
        model = DecisionTreeClassifier(random_state=0)
        model.fit(X_train_c, y_train_c)
        y_pred = model.predict(X_test_c)
    elif model_name == 'K-Nearest-Neighbors Classifier':
        model = KNeighborsClassifier(n_neighbors=3)
        model.fit(X_train_c, y_train_c)
        y_pred = model.predict(X_test_c)
    elif model_name == 'Logistic Regression Classifier':
        model = LogisticRegression(random_state=0)
        model.fit(X_train_c, y_train_c)
        y_pred = model.predict(X_test_c)
    elif model_name == 'Support Vector Classifier':
        model = SVC(gamma='auto')
        model.fit(X_train_c, y_train_c)
        y_pred = model.predict(X_test_c)
    elif model_name == 'Linear Regression':
        model = LinearRegression()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    elif model_name == 'Support Vector Regression':
        model = SVR(C=1.0, epsilon=0.2)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    elif model_name == 'Random Forest Regression':
        model = RandomForestRegressor(max_depth=2, random_state=0)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    elif model_name == 'Ridge Regression':
        model = Ridge(alpha=0.1)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    elif model_name == 'Lasso Regression':
        model = Lasso(alpha=0.1)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    elif model_name == 'Gradient Boosting Trees Regression':
        model = GradientBoostingRegressor(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    elif model_name == 'k-Nearest-Neighbors Regression':
        model = KNeighborsRegressor(n_neighbors=3)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    elif model_name == 'MLP Regression':
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(4, activation="relu"))
        model.add(keras.layers.Dense(3, activation="relu"))
        model.add(keras.layers.Dense(2, activation="relu"))
        model.add(keras.layers.Dense(1))
        model.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])
        history = model.fit(X_train, y_train, epochs=30, verbose=False)
        y_pred = model.predict(X_test, verbose=False)
    
    if model_name[-10:] == 'Classifier':
        #metricas de evaluacion de modelo
        metric = "Accuracy: {}".format(np.round(accuracy_score(np.round(y_test.values).astype(int), y_pred), decimals=4))
    elif model_name[-10:] == 'Regression':
        #RMSE
        metric = 'RMSE: {}'.format(np.round(mean_squared_error(y_test, y_pred, squared=False), decimals=4))

    df_variables['prediccion'] = model.predict(df_variables[['support_rate_rodolfo','tasa_aumento_pib','tasa_aumento_desempleo']])
    dept_pred = df_variables[df_variables['ubicacion'] == dept]['prediccion'].values[0]
    support_real = df_variables[df_variables['ubicacion'] == dept]['support_rate_rodolfo'].values[0]
    tasa_pib = df_variables[df_variables['ubicacion'] == dept]['tasa_aumento_pib'].values[0]
    tasa_desempleo = df_variables[df_variables['ubicacion'] == dept]['tasa_aumento_desempleo'].values[0]

    
    return metric, np.round(dept_pred, decimals=4), np.round(support_real, decimals=4), np.round(tasa_pib, decimals=4), np.round(tasa_desempleo, decimals=4)

In [5]:
from tkinter import *
import tkinter as tk
import tkintermapview

class Apliccation():
    def __init__(self):
        self.GUI = Tk()
        self.width= self.GUI.winfo_screenwidth()               
        self.height= self.GUI.winfo_screenheight()               
        self.GUI.geometry("%dx%d" % (self.width, self.height))
        self.GUI.title('Predicción de elecciones presidenciales Colombia 2022')
        self.polygon_1 = None
        self.map_widget = tkintermapview.TkinterMapView(self.GUI, width=self.width*0.95, height=self.height*0.5, corner_radius=0)
        self.map_widget.set_position(4.570868, -74.297333)  
        self.map_widget.set_zoom(6)
        self.map_widget.place(relx=0.5, rely=0.6, anchor=tk.CENTER)
        self.data_geo = preprocess_inputs('variables_procesadas_robertuito-sentiment-analysis.csv')
        self.departamentos = list(self.data_geo['NAME_1'].drop_duplicates())
        self.variable_sent = StringVar(self.GUI)
        self.variable_dept = StringVar(self.GUI)
        self.variable_model = StringVar(self.GUI)
        self.variable_dept.set("Boyacá")
        self.variable_sent.set("Robertuito")
        self.variable_model.set('MLP Regression')
        self.models = ['']
        self.metric = ''
        self.dept_pred = 0
        self.support_real = 0
        self.tasa_pib = 0
        self.tasa_desempleo = 0

    def update_labels(self):
        metric, dept_pred, support_real, tasa_pib, tasa_desempleo = load_model(self.variable_sent, self.variable_model, self.variable_dept)
        self.metric = metric
        self.dept_pred = dept_pred
        self.support_real = support_real
        self.tasa_pib = tasa_pib
        self.tasa_desempleo = tasa_desempleo

        label_sr = Label(self.GUI, text="Support Rate Real: ")
        label_sr.grid(row=3, column=0, sticky='ew')
        label_sr.config(fg="black", bg="lightgray", font=("Verdana",21))

        label_sr_v = Label(self.GUI, text="{}%".format(np.round(self.support_real*100, decimals=4)))
        label_sr_v.grid(row=3, column=1, sticky='ew')
        label_sr_v.config(fg="black", bg="lightgray", font=("Verdana",21))

        label_sp = Label(self.GUI, text="Support Rate Predicho: ")
        label_sp.grid(row=4, column=0, sticky='ew')
        label_sp.config(fg="black", bg="lightgray", font=("Verdana",21))

        label_sp_v = Label(self.GUI, text="{}%".format(self.dept_pred*100))
        label_sp_v.grid(row=4, column=1, sticky='ew')
        label_sp_v.config(fg="black", bg="lightgray", font=("Verdana",21))

        label_pib = Label(self.GUI, text="Tasa Incremento PIB: ")
        label_pib.grid(row=5, column=0, sticky='ew')
        label_pib.config(fg="black", bg="lightgray", font=("Verdana",21))

        label_pib_v = Label(self.GUI, text="{}%".format(self.tasa_pib*100))
        label_pib_v.grid(row=5, column=1, sticky='ew')
        label_pib_v.config(fg="black", bg="lightgray", font=("Verdana",21))

        label_des = Label(self.GUI, text="Tasa Incremento Desempleo: ")
        label_des.grid(row=6, column=0, sticky='ew')
        label_des.config(fg="black", bg="lightgray", font=("Verdana",21))

        label_des_v = Label(self.GUI, text="{}%".format(self.tasa_desempleo*100))
        label_des_v.grid(row=6, column=1, sticky='ew')
        label_des_v.config(fg="black", bg="lightgray", font=("Verdana",21)) 

        label_mt = Label(self.GUI, text="Métricas: ")
        label_mt.grid(row=7, column=0, sticky='ew')
        label_mt.config(fg="black", bg="lightgray", font=("Verdana",21))

        label_mt_v = Label(self.GUI, text=self.metric)
        label_mt_v.grid(row=7, column=1, sticky='ew')
        label_mt_v.config(fg="black", bg="lightgray", font=("Verdana",21))

        
    def change_department(self, choice):

        if self.polygon_1 != None:
            self.map_widget.set_position(4.570868, -74.297333) 
            self.polygon_1.delete()

        self.map_widget.set_position(4.570868, -74.297333) 
        choice = self.variable_dept.get()
        values = self.data_geo[self.data_geo['NAME_1']==choice]['geometry'].values[0].__geo_interface__
        
        if values['type'] == 'Polygon':
            coordinates = [(item[1], item[0]) for sublist in values['coordinates'] for item in sublist]
            lat, lon = np.mean([x[0] for x in coordinates]), np.mean([x[1] for x in coordinates])
            self.map_widget.set_position(lat, lon)
            self.map_widget.set_zoom(7)
            self.polygon_1 = self.map_widget.set_polygon(coordinates, fill_color="blue", outline_color="blue", border_width=5)
        elif values['type'] == 'MultiPolygon':
            coord = []
            for coordinate in values['coordinates']:
                coord.append([(x[1], x[0]) for x in coordinate[0]])
            coordinate_f = max(coord, key=len)
            lat, lon = np.mean([x[0] for x in coordinate_f]), np.mean([x[1] for x in coordinate_f])
            self.map_widget.set_position(lat, lon)
            self.map_widget.set_zoom(7)
            self.polygon_1 = self.map_widget.set_polygon(coordinate_f, fill_color="blue", outline_color="blue", border_width=5)

        self.update_labels()

    def change_sentiment_model(self, choice):
        
        choice = self.variable_sent.get()
        if choice == 'Robertuito':
            name = 'variables_procesadas_robertuito-sentiment-analysis.csv'
        elif choice == 'Beto':
            name = 'variables_procesadas_bert_BetoSentimentAnalysis.csv'
        elif choice == 'Codeswitch':
            name = 'variables_procesadas_codeswitch-spaeng-sentiment-analysis-lince.csv'
        
        self.data_geo = preprocess_inputs(name)
        self.departamentos = list(self.data_geo['NAME_1'].drop_duplicates())
        self.update_labels()

    def change_prediction_model(self):
        
        choice = self.variable_sent.get()
        if choice == 'Robertuito':
            reg_models = pd.read_csv('resultados/regresion_variables_procesadas_robertuito-sentiment-analysis.csv', index_col=[0])
            class_models = pd.read_csv('resultados/clasificacion_variables_procesadas_robertuito-sentiment-analysis.csv', index_col=[0])
        elif choice == 'Beto':
            reg_models = pd.read_csv('resultados/regresion_variables_procesadas_bert_BetoSentimentAnalysis.csv', index_col=[0])
            class_models = pd.read_csv('resultados/clasificacion_variables_procesadas_bert_BetoSentimentAnalysis.csv', index_col=[0])
        elif choice == 'Codeswitch':
            reg_models = pd.read_csv('resultados/regresion_variables_procesadas_codeswitch-spaeng-sentiment-analysis-lince.csv', index_col=[0])
            class_models = pd.read_csv('resultados/clasificacion_variables_procesadas_codeswitch-spaeng-sentiment-analysis-lince.csv', index_col=[0])

        both_models = [x[0] for x in pd.concat([reg_models[['Model']], class_models[['Model']]]).values]

        self.update_labels()

        return both_models
        

    def main(self):
        # setting variable for Integers
        
        self.variable_sent.set("Robertuito")
        sentiments = ['Robertuito', 'Beto', 'Codeswitch']

        # creating widget
        dropdown_sent = OptionMenu(
            self.GUI,
            self.variable_sent,
            *sentiments,
            command=self.change_sentiment_model
        )

        label_m = Label(self.GUI, text="Modelo Análisis de Sentimientos: ")
        label_m.grid(row=0, column=0, sticky='ew')
        label_m.config(fg="black", bg="lightgray", font=("Verdana",18))
        self.GUI.rowconfigure(0, {'minsize': 50})
        
        dropdown_sent.grid(row=0, column=1, sticky='ew')
        dropdown_sent.config(fg="black", bg="lightgray", font=("Verdana",18)) 

        label_p = Label(self.GUI, text="Modelo de Predicción: ")
        label_p.grid(row=1, column=0, sticky='ew')
        label_p.config(fg="black", bg="lightgray", font=("Verdana",18))

        self.variable_model.set("Random Forest Classifier")

        self.models = self.change_prediction_model()

        # creating widget
        dropdown_model = OptionMenu(
            self.GUI,
            self.variable_model,
            *self.models
            )
        dropdown_model.grid(row=1, column=1, sticky='ew')
        dropdown_model.config(fg="black", bg="lightgray", font=("Verdana",18)) 

        label_d = Label(self.GUI, text="Departamento: ")
        label_d.grid(row=2, column=0, sticky='ew')
        label_d.config(fg="black", bg="lightgray", font=("Verdana",18))

        self.variable_dept.set("Boyacá") # default value

        departamentos = self.departamentos

        # creating widget
        dropdown_dept = OptionMenu(
            self.GUI,
            self.variable_dept,
            *departamentos,
            command=self.change_department
        )

        dropdown_dept.grid(row=2, column=1, sticky='ew')
        dropdown_dept.config(fg="black", bg="lightgray", font=("Verdana",18)) 

        self.GUI.mainloop()

app = Apliccation()
app.main()

Exception ignored in: <function PhotoImage.__del__ at 0x00000202303E2E60>
Traceback (most recent call last):
  File "d:\Anaconda_39\envs\DL\lib\site-packages\PIL\ImageTk.py", line 118, in __del__
    name = self.__photo.name
AttributeError: 'PhotoImage' object has no attribute '_PhotoImage__photo'
Exception ignored in: <function PhotoImage.__del__ at 0x00000202303E2E60>
Traceback (most recent call last):
  File "d:\Anaconda_39\envs\DL\lib\site-packages\PIL\ImageTk.py", line 118, in __del__
Exception ignored in: <function PhotoImage.__del__ at 0x00000202303E2E60>
Traceback (most recent call last):
  File "d:\Anaconda_39\envs\DL\lib\site-packages\PIL\ImageTk.py", line 118, in __del__
    name = self.__photo.name
AttributeError: 'PhotoImage' object has no attribute '_PhotoImage__photo'
    name = self.__photo.name
AttributeError: 'PhotoImage' object has no attribute '_PhotoImage__photo'
Exception ignored in: <function PhotoImage.__del__ at 0x00000202303E2E60>
Traceback (most recent call l