ML-REFRANCE-CODES / WISDM DATSET CODES / MULTI-HEAD LSTM WITH SQUEEZE AND EXCITATION.py
MULTI-HEAD LSTM WITH SQUEEZE AND EXCITATION.py
Raw
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (LSTM, Dense, Dropout, Input, LayerNormalization, 
                                   Concatenate, GlobalAveragePooling1D, multiply, Layer, Reshape)
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
import tarfile
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger()

# Suppress TensorFlow GPU warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Enable memory growth for the GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        logger.error(f"GPU error: {e}")

# Custom Squeeze-and-Excitation Layer
class SqueezeExciteBlock(Layer):
    def __init__(self, ratio=16, **kwargs):
        super(SqueezeExciteBlock, self).__init__(**kwargs)
        self.ratio = ratio

    def build(self, input_shape):
        self.global_avg_pool = GlobalAveragePooling1D()
        self.dense1 = Dense(input_shape[-1] // self.ratio, 
                           activation='relu', 
                           kernel_initializer='he_normal', 
                           use_bias=False)
        self.dense2 = Dense(input_shape[-1], 
                           activation='sigmoid', 
                           kernel_initializer='he_normal', 
                           use_bias=False)
        super(SqueezeExciteBlock, self).build(input_shape)

    def call(self, inputs):
        x = self.global_avg_pool(inputs)
        x = Reshape((1, inputs.shape[-1]))(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = multiply([inputs, x])
        return x

    def get_config(self):
        config = super(SqueezeExciteBlock, self).get_config()
        config.update({"ratio": self.ratio})
        return config

# Custom multi-head LSTM layer
class MultiHeadLSTM:
    def __init__(self, units, num_heads):
        self.units = units
        self.num_heads = num_heads
        
    def __call__(self, inputs):
        lstm_heads = []
        for _ in range(self.num_heads):
            lstm = LSTM(self.units, return_sequences=True)(inputs)
            lstm = Dropout(0.5)(lstm)
            lstm_heads.append(lstm)
        return lstm_heads

logger.info("Starting data preprocessing...")

# Path to the dataset file
dataset_path = r'C:\Users\LENOVO LEGION\Desktop\ml codes\ML CODES WISDIM\WISDM_ar_latest.tar.gz'
extract_path = r'C:\Users\LENOVO LEGION\Desktop\ml codes\ML CODES WISDIM\WISDM_ar_latest'

# Extract the dataset
if not os.path.exists(extract_path):
    logger.info("Extracting dataset...")
    with tarfile.open(dataset_path, 'r:gz') as tar:
        tar.extractall(path=extract_path)

# Define the path to the main dataset file
data_file = os.path.join(extract_path, 'WISDM_ar_v1.1', 'WISDM_ar_v1.1_raw.txt')

# Load the dataset, skipping bad lines
logger.info("Loading dataset...")
column_names = ['user', 'activity', 'timestamp', 'x', 'y', 'z']
wisdm_data = pd.read_csv(data_file, header=None, names=column_names, on_bad_lines='skip')

logger.info(f"Initial dataset shape: {wisdm_data.shape}")

# Data Cleaning
# Convert all values to strings
wisdm_data['x'] = wisdm_data['x'].astype(str)
wisdm_data['y'] = wisdm_data['y'].astype(str)
wisdm_data['z'] = wisdm_data['z'].astype(str)

# Remove non-numeric characters
wisdm_data['x'] = wisdm_data['x'].str.replace(';', '', regex=False)
wisdm_data['y'] = wisdm_data['y'].str.replace(';', '', regex=False)
wisdm_data['z'] = wisdm_data['z'].str.replace(';', '', regex=False)

# Remove rows with non-numeric values
wisdm_data = wisdm_data[wisdm_data['x'].apply(lambda x: x.replace('.', '', 1).isdigit())]
wisdm_data = wisdm_data[wisdm_data['y'].apply(lambda y: y.replace('.', '', 1).isdigit())]
wisdm_data = wisdm_data[wisdm_data['z'].apply(lambda z: z.replace('.', '', 1).isdigit())]

# Convert columns back to numeric
wisdm_data['x'] = pd.to_numeric(wisdm_data['x'])
wisdm_data['y'] = pd.to_numeric(wisdm_data['y'])
wisdm_data['z'] = pd.to_numeric(wisdm_data['z'])

# Handle missing values
wisdm_data = wisdm_data.dropna()

logger.info(f"Dataset shape after cleaning: {wisdm_data.shape}")

# Feature Engineering
logger.info("Performing feature engineering...")

# Calculate magnitude
wisdm_data['magnitude'] = np.sqrt(wisdm_data['x']**2 + wisdm_data['y']**2 + wisdm_data['z']**2)

# Calculate jerk (derivative of acceleration)
for axis in ['x', 'y', 'z']:
    diff = np.diff(wisdm_data[axis])
    time_diff = np.diff(wisdm_data['timestamp'])
    jerk = np.zeros(len(wisdm_data))
    jerk[1:] = np.where(time_diff != 0, diff / time_diff, 0)
    wisdm_data[f'{axis}_jerk'] = jerk

# Calculate rolling mean and standard deviation
window_size = 20
for axis in ['x', 'y', 'z']:
    wisdm_data[f'{axis}_rolling_mean'] = wisdm_data.groupby('user')[axis].rolling(window=window_size).mean().reset_index(0, drop=True)
    wisdm_data[f'{axis}_rolling_std'] = wisdm_data.groupby('user')[axis].rolling(window=window_size).std().reset_index(0, drop=True)

# Handle NaN and infinite values
wisdm_data = wisdm_data.replace([np.inf, -np.inf], np.nan).fillna(method='ffill').fillna(method='bfill')

# Map activity labels to integers
activity_mapping = {label: idx for idx, label in enumerate(wisdm_data['activity'].unique())}
wisdm_data['activity'] = wisdm_data['activity'].map(activity_mapping)

# Reverse mapping for later use
reverse_activity_mapping = {v: k for k, v in activity_mapping.items()}

# Normalize features
logger.info("Normalizing features...")
scaler = StandardScaler()
features = ['x', 'y', 'z', 'magnitude', 'x_jerk', 'y_jerk', 'z_jerk', 
            'x_rolling_mean', 'y_rolling_mean', 'z_rolling_mean', 
            'x_rolling_std', 'y_rolling_std', 'z_rolling_std']
wisdm_data[features] = scaler.fit_transform(wisdm_data[features])

# Create sequences
def create_sequences(data, seq_length, step=1):
    sequences = []
    labels = []
    for start in range(0, len(data) - seq_length, step):
        sequences.append(data.iloc[start:start + seq_length][features].values)
        labels.append(data.iloc[start + seq_length - 1]['activity'])
    return np.array(sequences), np.array(labels)

# Create sequences from the data
sequence_length = 200
logger.info("Creating sequences...")
X, y = create_sequences(wisdm_data, sequence_length)
logger.info(f"Shape of X after sequence creation: {X.shape}")
logger.info(f"Shape of y after sequence creation: {y.shape}")

# Final check for any NaN or infinite values
if np.isnan(X).any() or np.isinf(X).any():
    logger.error("NaN or infinite values detected in the final dataset")
    raise ValueError("Dataset contains NaN or infinite values after preprocessing")

# Convert labels to categorical
y_categorical = to_categorical(y)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)
logger.info(f"Training set shape: {X_train.shape}")
logger.info(f"Testing set shape: {X_test.shape}")
# Multi-head LSTM with SE Model definition
def build_multi_head_lstm_se(input_shape, num_classes, num_heads=3):
    def multi_head_lstm(input_layer, num_heads, units):
        lstm_heads = []
        for _ in range(num_heads):
            lstm = LSTM(units, return_sequences=True)(input_layer)
            lstm = Dropout(0.5)(lstm)
            lstm = SqueezeExciteBlock()(lstm)
            lstm_heads.append(lstm)
        return Concatenate()(lstm_heads)

    # Input layer
    inputs = Input(shape=input_shape)
    
    # Multi-head LSTM with SE blocks
    x = multi_head_lstm(inputs, num_heads=num_heads, units=50)
    
    # Global pooling
    x = GlobalAveragePooling1D()(x)
    
    # Dense layers
    x = Dense(100, activation='relu')(x)
    x = Dropout(0.5)(x)
    
    # Output layer
    outputs = Dense(num_classes, activation='softmax')(x)
    
    # Create model
    model = Model(inputs=inputs, outputs=outputs)
    return model

def train_and_evaluate_model(model, X_train, y_train, X_test, y_test, model_name="Multi-head LSTM with SE"):
    # Compile model
    model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])
    
    # Define callbacks
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                 factor=0.2,
                                 patience=5,
                                 min_lr=0.00001)
    early_stopping = EarlyStopping(monitor='val_loss',
                                 patience=10,
                                 restore_best_weights=True)
    
    # Train model
    logger.info(f"\nTraining {model_name}...")
    history = model.fit(X_train, y_train,
                       epochs=20,
                       batch_size=64,
                       validation_split=0.2,
                       callbacks=[reduce_lr, early_stopping],
                       verbose=1)
    
    # Evaluate model
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    y_pred = model.predict(X_test)
    
    return model, history, accuracy, y_pred

def plot_se_weights(model, X_sample, num_heads=3, model_name="Multi-head LSTM with SE"):
    # Get SE layer outputs for each head
    se_layers = [layer for layer in model.layers if isinstance(layer, SqueezeExciteBlock)]
    
    # Create visualization for each head's SE weights
    plt.figure(figsize=(15, 5*num_heads))
    
    for i in range(num_heads):
        se_model = Model(inputs=model.input, 
                        outputs=se_layers[i].dense2.output)
        se_weights = se_model.predict(X_sample[:1])
        
        plt.subplot(num_heads, 1, i+1)
        plt.imshow(se_weights[0].T, aspect='auto', cmap='viridis')
        plt.title(f'Head {i+1} SE Channel Weights')
        plt.xlabel('Time Step')
        plt.ylabel('Channel')
        plt.colorbar(label='Weight')
    
    plt.tight_layout()
    plt.savefig(f'se_weights_{model_name.lower().replace(" ", "_")}.png')
    plt.close()

def plot_channel_relationships(model, X_test, model_name="Multi-head LSTM with SE"):
    # Get SE layer outputs
    se_layers = [layer for layer in model.layers if isinstance(layer, SqueezeExciteBlock)]
    feature_dims = X_test.shape[2]
    
    # Calculate channel relationships using SE weights
    channel_correlations = np.zeros((feature_dims, feature_dims))
    
    for se_layer in se_layers:
        se_model = Model(inputs=model.input, outputs=se_layer.dense2.output)
        se_weights = se_model.predict(X_test[:100])  # Use first 100 samples
        
        # Calculate correlations between channels based on SE weights
        for i in range(feature_dims):
            for j in range(feature_dims):
                correlation = np.corrcoef(se_weights[:, 0, i], se_weights[:, 0, j])[0, 1]
                channel_correlations[i, j] += correlation
    
    # Average correlations across heads
    channel_correlations /= len(se_layers)
    
    # Plot channel relationships
    plt.figure(figsize=(12, 10))
    sns.heatmap(channel_correlations, 
                xticklabels=features,
                yticklabels=features,
                cmap='RdBu_r',
                center=0,
                annot=True,
                fmt='.2f')
    plt.title(f'{model_name} - Channel Relationships')
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.savefig(f'channel_relationships_{model_name.lower().replace(" ", "_")}.png')
    plt.close()
    
    return channel_correlations

def plot_training_history(history, model_name="Multi-head LSTM with SE"):
    plt.figure(figsize=(15, 5))
    
    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'{model_name} - Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{model_name} - Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(f'training_history_{model_name.lower().replace(" ", "_")}.png')
    plt.close()

def plot_confusion_matrix(y_true, y_pred, model_name="Multi-head LSTM with SE"):
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(y_true, axis=1)
    
    cm = confusion_matrix(y_true_classes, y_pred_classes)
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'{model_name} - Confusion Matrix')
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    # Set x and y tick labels to activity names
    tick_marks = np.arange(len(reverse_activity_mapping))
    plt.xticks(tick_marks + 0.5, [reverse_activity_mapping[i] for i in range(len(reverse_activity_mapping))], 
               rotation=45, ha='right')
    plt.yticks(tick_marks + 0.5, [reverse_activity_mapping[i] for i in range(len(reverse_activity_mapping))], 
               rotation=0)
    
    plt.tight_layout()
    plt.savefig(f'confusion_matrix_{model_name.lower().replace(" ", "_")}.png')
    plt.close()
    
    return cm

def save_model_results(model, history, accuracy, cm, channel_correlations, model_name="Multi-head LSTM with SE"):
    with open(f'{model_name.lower().replace(" ", "_")}_results.txt', 'w') as f:
        # Model architecture
        f.write(f"{model_name} Architecture\n")
        f.write("="*50 + "\n\n")
        model.summary(print_fn=lambda x: f.write(x + '\n'))
        f.write("\n")
        
        # Performance metrics
        f.write("Performance Metrics\n")
        f.write("-"*50 + "\n")
        f.write(f"Test Accuracy: {accuracy*100:.2f}%\n\n")
        
        # Confusion Matrix
        f.write("Confusion Matrix\n")
        f.write("-"*50 + "\n")
        np.savetxt(f, cm, fmt='%d')
        f.write("\n")
        
        # Channel Relationships
        f.write("Channel Relationship Analysis\n")
        f.write("-"*50 + "\n")
        f.write("Top 5 strongest channel relationships:\n")
        relationships = []
        for i in range(len(features)):
            for j in range(i+1, len(features)):
                relationships.append((features[i], features[j], channel_correlations[i,j]))
        
        for feat1, feat2, corr in sorted(relationships, key=lambda x: abs(x[2]), reverse=True)[:5]:
            f.write(f"{feat1} - {feat2}: {corr:.4f}\n")
        f.write("\n")
        
        # Training history
        f.write("Training History\n")
        f.write("-"*50 + "\n")
        f.write("Epoch\tLoss\tAccuracy\tVal_Loss\tVal_Accuracy\n")
        for i in range(len(history.history['loss'])):
            f.write(f"{i+1}\t{history.history['loss'][i]:.4f}\t")
            f.write(f"{history.history['accuracy'][i]:.4f}\t")
            f.write(f"{history.history['val_loss'][i]:.4f}\t")
            f.write(f"{history.history['val_accuracy'][i]:.4f}\n")

# Main execution
if __name__ == "__main__":
    logger.info("Starting Multi-head LSTM with SE model training and evaluation...")
    
    # Create and compile model
    input_shape = (X_train.shape[1], X_train.shape[2])
    num_classes = y_train.shape[1]
    num_heads = 3
    model = build_multi_head_lstm_se(input_shape, num_classes, num_heads)
    
    # Print model summary
    model.summary()
    
    # Train and evaluate model
    model, history, accuracy, y_pred = train_and_evaluate_model(
        model, X_train, y_train, X_test, y_test
    )
    
    # Generate visualizations and metrics
    cm = plot_confusion_matrix(y_test, y_pred)
    plot_training_history(history)
    plot_se_weights(model, X_test, num_heads)
    channel_correlations = plot_channel_relationships(model, X_test)
    
    # Save results
    save_model_results(model, history, accuracy, cm, channel_correlations)
    
    # Save model
    model.save('multihead_lstm_se_wisdm.h5')
    
    logger.info(f"\nMulti-head LSTM with SE Results:")
    logger.info(f"Test Accuracy: {accuracy*100:.2f}%")
    logger.info("Model and results have been saved.")