from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB from xgboost import XGBClassifier from lightgbm import LGBMClassifier from sklearn.neural_network import MLPClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, f1_score, precision_score from sklearn.naive_bayes import MultinomialNB, ComplementNB, BernoulliNB from sklearn.linear_model import PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier import pandas as pd import warnings # Ignore all warnings warnings.filterwarnings("ignore") import numpy as np def train_classifiers(X_train,X_test, y_train,y_test, method_name): models = { "Naive Bayes": GaussianNB(), "Logistic Regression": LogisticRegression(max_iter=5000), # Increased max_iter "KNN": KNeighborsClassifier(n_neighbors=5), "Decision Tree": DecisionTreeClassifier(), "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42), "SVM": SVC(kernel='linear'), "XGBoost": XGBClassifier(eval_metric='logloss'), "LightGBM": LGBMClassifier(force_col_wise=True, verbose=-1), "AdaBoost": AdaBoostClassifier(n_estimators=50, random_state=42), "MLP Neural Network": MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42), } # Dictionary to store results results = {} trained_models = {} # Train & Evaluate each model for name, model in models.items(): model.fit(X_train, y_train) # Train model y_pred = model.predict(X_test) # Predict on test set # Compute metrics accuracy = accuracy_score(y_test, y_pred) f1 = f1_score(y_test, y_pred, average='weighted') # Use 'weighted' for multi-class problems precision = precision_score(y_test, y_pred, average='weighted') # Use 'weighted' for multi-class problems # Store results results[name] = {"Accuracy": accuracy, "F1 Score": f1, "Precision": precision} trained_models[name] = model # Store trained model # Print results print(f"\nTraining with {method_name}") print("{:<25} {:<10} {:<10} {:<10}".format("Model", "Accuracy", "F1 Score", "Precision")) print("-" * 60) for model, metrics in results.items(): print("{:<25} {:.4f} {:.4f} {:.4f}".format( model, metrics["Accuracy"], metrics["F1 Score"], metrics["Precision"] )) return results, trained_models def train_NLP(X_train, X_test, y_train, y_test, method_name): X_train=X_train.astype(np.float32) X_test=X_test.astype(np.float32) models = { "Naive Bayes (Multinomial)": MultinomialNB(), "Naive Bayes (Complement)": ComplementNB(), "Naive Bayes (Bernoulli)": BernoulliNB(), "Passive Aggressive Classifier": PassiveAggressiveClassifier(max_iter=1000, random_state=42), "Ridge Classifier": RidgeClassifier(), "SGD Classifier (Linear SVM)": SGDClassifier(loss="log_loss", max_iter=1000, random_state=42), "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42), "SVM": SVC(kernel='linear'), "XGBoost": XGBClassifier(eval_metric='logloss'), "LightGBM": LGBMClassifier(force_col_wise=True, verbose=-1), "AdaBoost": AdaBoostClassifier(n_estimators=50, random_state=42), "MLP Neural Network": MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42), } # Dictionary to store results results = {} trained_models = {} print(f"\nTraining with {method_name}") print("{:<30} {:<10} {:<10} {:<10}".format("Model", "Accuracy", "F1 Score", "Precision")) print("-" * 70) # Train & Evaluate each model for name, model in models.items(): model.fit(X_train, y_train) y_pred = model.predict(X_test) # Compute metrics accuracy = accuracy_score(y_test, y_pred) f1 = f1_score(y_test, y_pred, average='weighted') # Use 'weighted' for multi-class problems precision = precision_score(y_test, y_pred, average='weighted') # Use 'weighted' for multi-class problems # Store results results[name] = {"Accuracy": accuracy, "F1 Score": f1, "Precision": precision} trained_models[name] = model # Store trained model # Print formatted results print("{:<30} {:.4f} {:.4f} {:.4f}".format(name, accuracy, f1, precision)) return results, trained_models # ## Logistic Regression # def LogisticReg(X_train,y_train,X_test,y_test): # # Initialize and train the model # model = LogisticRegression(max_iter=5000) # model.fit(X_train, y_train) # # Predict and evaluate # y_pred = model.predict(X_test) # print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred)) # #KNN Classifer # def KNN(X_train,y_train,X_test,y_test): # model = KNeighborsClassifier(n_neighbors=5) # model.fit(X_train, y_train) # y_pred = model.predict(X_test) # print("KNN Accuracy:", accuracy_score(y_test, y_pred)) # ## Decision Tress # def DicitionTree(X_train,y_train,X_test,y_test): # model = DecisionTreeClassifier() # model.fit(X_train, y_train) # y_pred = model.predict(X_test) # print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred)) # ## Random Forest # def RandomForest(X_train,y_train,X_test,y_test): # model = RandomForestClassifier(n_estimators=100, random_state=42) # model.fit(X_train, y_train) # y_pred = model.predict(X_test) # print("Random Forest Accuracy:", accuracy_score(y_test, y_pred)) # ## Support Vector Machine # def SVV(X_train,y_train,X_test,y_test): # model = SVC(kernel='linear') # Try 'rbf' for non-linear classification # model.fit(X_train, y_train) # y_pred = model.predict(X_test) # print("SVM Accuracy:", accuracy_score(y_test, y_pred)) # ## Naive Bayes # def NaivBayes(X_train,y_train,X_test,y_test): # model = GaussianNB() # model.fit(X_train, y_train) # y_pred = model.predict(X_test) # print("Naïve Bayes Accuracy:", accuracy_score(y_test, y_pred)) # ## Gradient Boosting # def XGBoost(X_train,y_train,X_test,y_test): # model = XGBClassifier(use_label_encoder=False, eval_metric='logloss') # model.fit(X_train, y_train) # y_pred = model.predict(X_test) # print("XGBoost Accuracy:", accuracy_score(y_test, y_pred)) # ## AdaBoost Classifier # def AdaBoost(X_train,y_train,X_test,y_test): # model = AdaBoostClassifier(n_estimators=50, random_state=42) # model.fit(X_train, y_train) # y_pred = model.predict(X_test) # print("AdaBoost Accuracy:", accuracy_score(y_test, y_pred)) # ## LightGBM (Fast Gradient Boosting) # def LightGBM(X_train,y_train,X_test,y_test): # model = LGBMClassifier() # model.fit(X_train, y_train) # y_pred = model.predict(X_test) # print("LightGBM Accuracy:", accuracy_score(y_test, y_pred)) # ## Neural Network (MLP) # def MLP(X_train,y_train,X_test,y_test): # model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42) # model.fit(X_train, y_train) # y_pred = model.predict(X_test) # print("MLP Neural Network Accuracy:", accuracy_score(y_test, y_pred))