from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score
from sklearn.naive_bayes import MultinomialNB, ComplementNB, BernoulliNB
from sklearn.linear_model import PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier
import pandas as pd
import warnings
# Ignore all warnings
warnings.filterwarnings("ignore")
import numpy as np
def train_classifiers(X_train,X_test, y_train,y_test, method_name):
models = {
"Naive Bayes": GaussianNB(),
"Logistic Regression": LogisticRegression(max_iter=5000), # Increased max_iter
"KNN": KNeighborsClassifier(n_neighbors=5),
"Decision Tree": DecisionTreeClassifier(),
"Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
"SVM": SVC(kernel='linear'),
"XGBoost": XGBClassifier(eval_metric='logloss'),
"LightGBM": LGBMClassifier(force_col_wise=True, verbose=-1),
"AdaBoost": AdaBoostClassifier(n_estimators=50, random_state=42),
"MLP Neural Network": MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42),
}
# Dictionary to store results
results = {}
trained_models = {}
# Train & Evaluate each model
for name, model in models.items():
model.fit(X_train, y_train) # Train model
y_pred = model.predict(X_test) # Predict on test set
# Compute metrics
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted') # Use 'weighted' for multi-class problems
precision = precision_score(y_test, y_pred, average='weighted') # Use 'weighted' for multi-class problems
# Store results
results[name] = {"Accuracy": accuracy, "F1 Score": f1, "Precision": precision}
trained_models[name] = model # Store trained model
# Print results
print(f"\nTraining with {method_name}")
print("{:<25} {:<10} {:<10} {:<10}".format("Model", "Accuracy", "F1 Score", "Precision"))
print("-" * 60)
for model, metrics in results.items():
print("{:<25} {:.4f} {:.4f} {:.4f}".format(
model, metrics["Accuracy"], metrics["F1 Score"], metrics["Precision"]
))
return results, trained_models
def train_NLP(X_train, X_test, y_train, y_test, method_name):
X_train=X_train.astype(np.float32)
X_test=X_test.astype(np.float32)
models = {
"Naive Bayes (Multinomial)": MultinomialNB(),
"Naive Bayes (Complement)": ComplementNB(),
"Naive Bayes (Bernoulli)": BernoulliNB(),
"Passive Aggressive Classifier": PassiveAggressiveClassifier(max_iter=1000, random_state=42),
"Ridge Classifier": RidgeClassifier(),
"SGD Classifier (Linear SVM)": SGDClassifier(loss="log_loss", max_iter=1000, random_state=42),
"Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
"SVM": SVC(kernel='linear'),
"XGBoost": XGBClassifier(eval_metric='logloss'),
"LightGBM": LGBMClassifier(force_col_wise=True, verbose=-1),
"AdaBoost": AdaBoostClassifier(n_estimators=50, random_state=42),
"MLP Neural Network": MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42),
}
# Dictionary to store results
results = {}
trained_models = {}
print(f"\nTraining with {method_name}")
print("{:<30} {:<10} {:<10} {:<10}".format("Model", "Accuracy", "F1 Score", "Precision"))
print("-" * 70)
# Train & Evaluate each model
for name, model in models.items():
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Compute metrics
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted') # Use 'weighted' for multi-class problems
precision = precision_score(y_test, y_pred, average='weighted') # Use 'weighted' for multi-class problems
# Store results
results[name] = {"Accuracy": accuracy, "F1 Score": f1, "Precision": precision}
trained_models[name] = model # Store trained model
# Print formatted results
print("{:<30} {:.4f} {:.4f} {:.4f}".format(name, accuracy, f1, precision))
return results, trained_models
# ## Logistic Regression
# def LogisticReg(X_train,y_train,X_test,y_test):
# # Initialize and train the model
# model = LogisticRegression(max_iter=5000)
# model.fit(X_train, y_train)
# # Predict and evaluate
# y_pred = model.predict(X_test)
# print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred))
# #KNN Classifer
# def KNN(X_train,y_train,X_test,y_test):
# model = KNeighborsClassifier(n_neighbors=5)
# model.fit(X_train, y_train)
# y_pred = model.predict(X_test)
# print("KNN Accuracy:", accuracy_score(y_test, y_pred))
# ## Decision Tress
# def DicitionTree(X_train,y_train,X_test,y_test):
# model = DecisionTreeClassifier()
# model.fit(X_train, y_train)
# y_pred = model.predict(X_test)
# print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred))
# ## Random Forest
# def RandomForest(X_train,y_train,X_test,y_test):
# model = RandomForestClassifier(n_estimators=100, random_state=42)
# model.fit(X_train, y_train)
# y_pred = model.predict(X_test)
# print("Random Forest Accuracy:", accuracy_score(y_test, y_pred))
# ## Support Vector Machine
# def SVV(X_train,y_train,X_test,y_test):
# model = SVC(kernel='linear') # Try 'rbf' for non-linear classification
# model.fit(X_train, y_train)
# y_pred = model.predict(X_test)
# print("SVM Accuracy:", accuracy_score(y_test, y_pred))
# ## Naive Bayes
# def NaivBayes(X_train,y_train,X_test,y_test):
# model = GaussianNB()
# model.fit(X_train, y_train)
# y_pred = model.predict(X_test)
# print("Naïve Bayes Accuracy:", accuracy_score(y_test, y_pred))
# ## Gradient Boosting
# def XGBoost(X_train,y_train,X_test,y_test):
# model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
# model.fit(X_train, y_train)
# y_pred = model.predict(X_test)
# print("XGBoost Accuracy:", accuracy_score(y_test, y_pred))
# ## AdaBoost Classifier
# def AdaBoost(X_train,y_train,X_test,y_test):
# model = AdaBoostClassifier(n_estimators=50, random_state=42)
# model.fit(X_train, y_train)
# y_pred = model.predict(X_test)
# print("AdaBoost Accuracy:", accuracy_score(y_test, y_pred))
# ## LightGBM (Fast Gradient Boosting)
# def LightGBM(X_train,y_train,X_test,y_test):
# model = LGBMClassifier()
# model.fit(X_train, y_train)
# y_pred = model.predict(X_test)
# print("LightGBM Accuracy:", accuracy_score(y_test, y_pred))
# ## Neural Network (MLP)
# def MLP(X_train,y_train,X_test,y_test):
# model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42)
# model.fit(X_train, y_train)
# y_pred = model.predict(X_test)
# print("MLP Neural Network Accuracy:", accuracy_score(y_test, y_pred))