UCI DATASET CODES/COMPARISON BETWEEN THE 5 ALGORITHMS USED.py · ML-REFRANCE-CODES

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, Attention, LayerNormalization, Concatenate, GlobalAveragePooling1D, multiply, Layer, Reshape
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

# Suppress TensorFlow GPU warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Paths to the dataset files
dataset_dir_path = r'C:\Users\LENOVO LEGION\Downloads\human+activity+recognition+using+smartphones\UCI HAR Dataset\UCI HAR Dataset'

# Load and preprocess data
def load_data():
    # Load the feature labels
    features = pd.read_csv(os.path.join(dataset_dir_path, 'features.txt'), delim_whitespace=True, header=None, names=['index', 'feature'])
    
    # Load the activity labels
    activity_labels = pd.read_csv(os.path.join(dataset_dir_path, 'activity_labels.txt'), delim_whitespace=True, header=None, names=['index', 'activity'])
    
    # Load training data
    X_train = pd.read_csv(os.path.join(dataset_dir_path, 'train', 'X_train.txt'), delim_whitespace=True, header=None)
    y_train = pd.read_csv(os.path.join(dataset_dir_path, 'train', 'y_train.txt'), delim_whitespace=True, header=None, names=['activity'])
    
    # Load test data
    X_test = pd.read_csv(os.path.join(dataset_dir_path, 'test', 'X_test.txt'), delim_whitespace=True, header=None)
    y_test = pd.read_csv(os.path.join(dataset_dir_path, 'test', 'y_test.txt'), delim_whitespace=True, header=None, names=['activity'])
    
    # Label the columns
    X_train.columns = features['feature']
    X_test.columns = features['feature']
    
    # Normalize the feature data
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Reshape the data for LSTM (samples, time steps, features)
    X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
    X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))
    
    # Encode activity labels
    encoder = LabelEncoder()
    y_train_encoded = encoder.fit_transform(y_train['activity'])
    y_test_encoded = encoder.transform(y_test['activity'])
    
    # Convert to categorical
    y_train_categorical = to_categorical(y_train_encoded)
    y_test_categorical = to_categorical(y_test_encoded)
    
    return X_train_reshaped, y_train_categorical, X_test_reshaped, y_test_categorical, activity_labels, X_train_scaled

# Load data
X_train, y_train, X_test, y_test, activity_labels, X_train_scaled = load_data()

# Define model architectures

# 1. Simple LSTM
def simple_lstm(input_shape, num_classes):
    model = Sequential()
    model.add(LSTM(100, input_shape=input_shape, return_sequences=False))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    return model

# 2. Deep LSTM
def deep_lstm(input_shape, num_classes):
    model = Sequential()
    model.add(LSTM(100, input_shape=input_shape, return_sequences=True))
    model.add(Dropout(0.5))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    return model

# 3. LSTM with Attention
def lstm_attention(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    x = LSTM(100, return_sequences=True)(inputs)
    x = Dropout(0.5)(x)
    x = LSTM(100, return_sequences=True)(x)
    x = Dropout(0.5)(x)
    
    attention = Attention()([x, x])
    attention = LayerNormalization()(attention)
    
    x = Concatenate()([x, attention])
    x = GlobalAveragePooling1D()(x)
    x = Dense(100, activation='relu')(x)
    x = Dropout(0.5)(x)
    
    outputs = Dense(num_classes, activation='softmax')(x)
    return Model(inputs, outputs)

# 4. Multi-Head LSTM with Attention
def multi_head_lstm_attention(input_shape, num_classes, num_heads=3):
    inputs = Input(shape=input_shape)
    
    lstm_outs = []
    for _ in range(num_heads):
        lstm = LSTM(50, return_sequences=True)(inputs)
        lstm_outs.append(lstm)
    
    x = Concatenate()(lstm_outs)
    x = Dropout(0.5)(x)
    
    attention = Attention()([x, x])
    attention = LayerNormalization()(attention)
    
    x = Concatenate()([x, attention])
    x = GlobalAveragePooling1D()(x)
    x = Dense(100, activation='relu')(x)
    x = Dropout(0.5)(x)
    
    outputs = Dense(num_classes, activation='softmax')(x)
    return Model(inputs, outputs)

# 5. Multi-Head LSTM with SE blocks
class SqueezeExciteBlock(Layer):
    def __init__(self, ratio=16, **kwargs):
        super(SqueezeExciteBlock, self).__init__(**kwargs)
        self.ratio = ratio

    def build(self, input_shape):
        self.global_avg_pool = GlobalAveragePooling1D()
        self.dense1 = Dense(input_shape[-1] // self.ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)
        self.dense2 = Dense(input_shape[-1], activation='sigmoid', kernel_initializer='he_normal', use_bias=False)
        super(SqueezeExciteBlock, self).build(input_shape)

    def call(self, inputs):
        x = self.global_avg_pool(inputs)
        x = Reshape((1, inputs.shape[-1]))(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = multiply([inputs, x])
        return x

    def get_config(self):
        config = super(SqueezeExciteBlock, self).get_config()
        config.update({"ratio": self.ratio})
        return config

def multi_head_lstm_se(input_shape, num_classes, num_heads=3):
    inputs = Input(shape=input_shape)
    
    lstm_outs = []
    for _ in range(num_heads):
        lstm = LSTM(100, return_sequences=True)(inputs)
        lstm = Dropout(0.5)(lstm)
        lstm = SqueezeExciteBlock()(lstm)
        lstm_outs.append(lstm)
    
    x = Concatenate()(lstm_outs)
    x = GlobalAveragePooling1D()(x)
    x = Dense(100, activation='relu')(x)
    x = Dropout(0.5)(x)
    
    outputs = Dense(num_classes, activation='softmax')(x)
    return Model(inputs, outputs)

# Function to train and evaluate models
def train_and_evaluate(model, X_train, y_train, X_test, y_test, model_name):
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001)
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    
    history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_split=0.2, 
                        verbose=1, callbacks=[reduce_lr, early_stopping])
    
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    
    precision = precision_score(y_true, y_pred_classes, average='weighted')
    recall = recall_score(y_true, y_pred_classes, average='weighted')
    f1 = f1_score(y_true, y_pred_classes, average='weighted')
    
    print(f'{model_name} Test Accuracy: {accuracy*100:.2f}%')
    
    return history, accuracy, precision, recall, f1, y_true, y_pred_classes

# Define and train models
input_shape = (X_train.shape[1], X_train.shape[2])
num_classes = y_train.shape[1]

models = {
    'Simple LSTM': simple_lstm(input_shape, num_classes),
    'Deep LSTM': deep_lstm(input_shape, num_classes),
    'LSTM with Attention': lstm_attention(input_shape, num_classes),
    'Multi-Head LSTM with Attention': multi_head_lstm_attention(input_shape, num_classes),
    'Multi-Head LSTM with SE': multi_head_lstm_se(input_shape, num_classes)
}

results = {}
for name, model in models.items():
    results[name] = train_and_evaluate(model, X_train, y_train, X_test, y_test, name)

# Visualization functions
def plot_accuracy_epochs(histories):
    plt.figure(figsize=(12, 6))
    for name, history in histories.items():
        plt.plot(history.history['accuracy'], label=f'{name} - Training')
        plt.plot(history.history['val_accuracy'], label=f'{name} - Validation')
    plt.title('Accuracy vs Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

def plot_loss_epochs(histories):
    plt.figure(figsize=(12, 6))
    for name, history in histories.items():
        plt.plot(history.history['loss'], label=f'{name} - Training')
        plt.plot(history.history['val_loss'], label=f'{name} - Validation')
    plt.title('Loss vs Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

def plot_confusion_matrix(y_true, y_pred, labels, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix - {title}')
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.xticks(ticks=np.arange(len(labels)) + 0.5, labels=labels, rotation=45)
    plt.yticks(ticks=np.arange(len(labels)) + 0.5, labels=labels, rotation=0)
    plt.tight_layout()
    plt.show()

def plot_metrics_comparison(results):
    metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
    model_names = list(results.keys())
    values = np.array([[res[1], res[2], res[3], res[4]] for res in results.values()])

    plt.figure(figsize=(12, 8))
    sns.heatmap(values.T, annot=True, fmt='.3f', cmap='YlOrRd', 
                xticklabels=model_names, yticklabels=metrics)
    plt.title('Model Performance Metrics Heatmap')
    plt.tight_layout()
    plt.show()

# Generate visualizations
histories = {name: res[0] for name, res in results.items()}
plot_accuracy_epochs(histories)
plot_loss_epochs(histories)

for name, res in results.items():
    plot_confusion_matrix(res[5], res[6], activity_labels['activity'], name)

plot_metrics_comparison(results)

# Box plot of accuracies
plt.figure(figsize=(10, 6))
accuracies = [res[0].history['val_accuracy'] for res in results.values()]
plt.boxplot(accuracies, labels=list(results.keys()))
plt.title('Validation Accuracy Comparison')
plt.ylabel('Accuracy')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Recognition error over time
plt.figure(figsize=(12, 6))
for name, history in histories.items():
    recognition_error = 1 - np.array(history.history['accuracy'])
    plt.plot(recognition_error, label=name)
plt.title('Recognition Error Over Time')
plt.ylabel('Recognition Error')
plt.xlabel('Epoch')
plt.legend()
plt.show()

# Correlation matrix
correlation_matrix = pd.DataFrame(X_train_scaled).corr()
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, cmap='coolwarm', annot=False)
plt.title('Correlation Matrix of Features')
plt.show()

# Comparison between accuracy and epoch
plt.figure(figsize=(12, 6))
for name, history in histories.items():
    plt.plot(history.history['accuracy'], label=f'{name} - Training')
    plt.plot(history.history['val_accuracy'], label=f'{name} - Validation')
plt.title('Comparison between Accuracy and Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Print classification reports
for name, res in results.items():
    print(f"\nClassification Report for {name}:")
    print(classification_report(res[5], res[6], target_names=activity_labels['activity']))
    
    # F1 Score, Precision, and Recall comparison
plt.figure(figsize=(12, 6))
metrics = ['Precision', 'Recall', 'F1 Score']
x = np.arange(len(models))
width = 0.25

for i, metric in enumerate(metrics):
    values = [res[i+2] for res in results.values()]
    plt.bar(x + i*width, values, width, label=metric)

plt.xlabel('Models')
plt.ylabel('Score')
plt.title('Precision, Recall, and F1 Score Comparison')
plt.xticks(x + width, list(models.keys()), rotation=45, ha='right')
plt.legend()
plt.tight_layout()
plt.show()

# Training and Validation loss figure for each model
for name, history in histories.items():
    plt.figure(figsize=(10, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'Training and Validation Loss - {name}')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

# Main execution block
if __name__ == "__main__":
    print("Human Activity Recognition Model Comparison")
    print("===========================================")
    
    # Print overall results
    for name, res in results.items():
        print(f"\n{name}:")
        print(f"  Accuracy: {res[1]:.4f}")
        print(f"  Precision: {res[2]:.4f}")
        print(f"  Recall: {res[3]:.4f}")
        print(f"  F1 Score: {res[4]:.4f}")
    
    print("\nVisualization Summary:")
    print("1. Accuracy vs Epochs plot")
    print("2. Loss vs Epochs plot")
    print("3. Confusion Matrix for each model")
    print("4. Model Performance Metrics Heatmap")
    print("5. Validation Accuracy Comparison (Box Plot)")
    print("6. Recognition Error Over Time")
    print("7. Correlation Matrix of Features")
    print("8. Comparison between Accuracy and Epoch")
    print("9. Precision, Recall, and F1 Score Comparison")
    print("10. Training and Validation Loss for each model")
    
    print("\nClassification Reports:")
    for name, res in results.items():
        print(f"\nClassification Report for {name}:")
        print(classification_report(res[5], res[6], target_names=activity_labels['activity']))

# Save results to file
with open('har_model_comparison_results.txt', 'w') as f:
    for name, res in results.items():
        f.write(f"\n{name}:\n")
        f.write(f"  Accuracy: {res[1]:.4f}\n")
        f.write(f"  Precision: {res[2]:.4f}\n")
        f.write(f"  Recall: {res[3]:.4f}\n")
        f.write(f"  F1 Score: {res[4]:.4f}\n")
        f.write("\nClassification Report:\n")
        f.write(classification_report(res[5], res[6], target_names=activity_labels['activity']))
        f.write("\n" + "="*50 + "\n")

print("\nResults have been saved to 'har_model_comparison_results.txt'")