import os import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import StandardScaler from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report from tensorflow.keras.models import Model from tensorflow.keras.layers import (LSTM, Dense, Dropout, Input, Attention, LayerNormalization, Concatenate, GlobalAveragePooling1D) from tensorflow.keras.utils import to_categorical from tensorflow.keras.optimizers import Adam from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping from sklearn.model_selection import train_test_split import numpy as np import tensorflow as tf import tarfile import logging # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') logger = logging.getLogger() # Suppress TensorFlow GPU warnings os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Enable memory growth for the GPU gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: logger.error(f"GPU error: {e}") # Custom multi-head LSTM layer class MultiHeadLSTM: def __init__(self, units, num_heads): self.units = units self.num_heads = num_heads def __call__(self, inputs): lstm_heads = [] for _ in range(self.num_heads): lstm = LSTM(self.units, return_sequences=True)(inputs) lstm = Dropout(0.5)(lstm) lstm_heads.append(lstm) return lstm_heads logger.info("Starting data preprocessing...") # Path to the dataset file dataset_path = r'C:\Users\LENOVO LEGION\Desktop\ml codes\ML CODES WISDIM\WISDM_ar_latest.tar.gz' extract_path = r'C:\Users\LENOVO LEGION\Desktop\ml codes\ML CODES WISDIM\WISDM_ar_latest' # Extract the dataset if not os.path.exists(extract_path): logger.info("Extracting dataset...") with tarfile.open(dataset_path, 'r:gz') as tar: tar.extractall(path=extract_path) # Define the path to the main dataset file data_file = os.path.join(extract_path, 'WISDM_ar_v1.1', 'WISDM_ar_v1.1_raw.txt') # Load the dataset, skipping bad lines logger.info("Loading dataset...") column_names = ['user', 'activity', 'timestamp', 'x', 'y', 'z'] wisdm_data = pd.read_csv(data_file, header=None, names=column_names, on_bad_lines='skip') logger.info(f"Initial dataset shape: {wisdm_data.shape}") # Data Cleaning # Convert all values to strings wisdm_data['x'] = wisdm_data['x'].astype(str) wisdm_data['y'] = wisdm_data['y'].astype(str) wisdm_data['z'] = wisdm_data['z'].astype(str) # Remove non-numeric characters wisdm_data['x'] = wisdm_data['x'].str.replace(';', '', regex=False) wisdm_data['y'] = wisdm_data['y'].str.replace(';', '', regex=False) wisdm_data['z'] = wisdm_data['z'].str.replace(';', '', regex=False) # Remove rows with non-numeric values wisdm_data = wisdm_data[wisdm_data['x'].apply(lambda x: x.replace('.', '', 1).isdigit())] wisdm_data = wisdm_data[wisdm_data['y'].apply(lambda y: y.replace('.', '', 1).isdigit())] wisdm_data = wisdm_data[wisdm_data['z'].apply(lambda z: z.replace('.', '', 1).isdigit())] # Convert columns back to numeric wisdm_data['x'] = pd.to_numeric(wisdm_data['x']) wisdm_data['y'] = pd.to_numeric(wisdm_data['y']) wisdm_data['z'] = pd.to_numeric(wisdm_data['z']) # Handle missing values wisdm_data = wisdm_data.dropna() logger.info(f"Dataset shape after cleaning: {wisdm_data.shape}") # Feature Engineering logger.info("Performing feature engineering...") # Calculate magnitude wisdm_data['magnitude'] = np.sqrt(wisdm_data['x']**2 + wisdm_data['y']**2 + wisdm_data['z']**2) # Calculate jerk (derivative of acceleration) for axis in ['x', 'y', 'z']: diff = np.diff(wisdm_data[axis]) time_diff = np.diff(wisdm_data['timestamp']) jerk = np.zeros(len(wisdm_data)) jerk[1:] = np.where(time_diff != 0, diff / time_diff, 0) wisdm_data[f'{axis}_jerk'] = jerk # Calculate rolling mean and standard deviation window_size = 20 for axis in ['x', 'y', 'z']: wisdm_data[f'{axis}_rolling_mean'] = wisdm_data.groupby('user')[axis].rolling(window=window_size).mean().reset_index(0, drop=True) wisdm_data[f'{axis}_rolling_std'] = wisdm_data.groupby('user')[axis].rolling(window=window_size).std().reset_index(0, drop=True) # Handle NaN and infinite values wisdm_data = wisdm_data.replace([np.inf, -np.inf], np.nan).fillna(method='ffill').fillna(method='bfill') # Map activity labels to integers activity_mapping = {label: idx for idx, label in enumerate(wisdm_data['activity'].unique())} wisdm_data['activity'] = wisdm_data['activity'].map(activity_mapping) # Reverse mapping for later use reverse_activity_mapping = {v: k for k, v in activity_mapping.items()} # Normalize features logger.info("Normalizing features...") scaler = StandardScaler() features = ['x', 'y', 'z', 'magnitude', 'x_jerk', 'y_jerk', 'z_jerk', 'x_rolling_mean', 'y_rolling_mean', 'z_rolling_mean', 'x_rolling_std', 'y_rolling_std', 'z_rolling_std'] wisdm_data[features] = scaler.fit_transform(wisdm_data[features]) # Create sequences def create_sequences(data, seq_length, step=1): sequences = [] labels = [] for start in range(0, len(data) - seq_length, step): sequences.append(data.iloc[start:start + seq_length][features].values) labels.append(data.iloc[start + seq_length - 1]['activity']) return np.array(sequences), np.array(labels) # Create sequences from the data sequence_length = 200 logger.info("Creating sequences...") X, y = create_sequences(wisdm_data, sequence_length) logger.info(f"Shape of X after sequence creation: {X.shape}") logger.info(f"Shape of y after sequence creation: {y.shape}") # Final check for any NaN or infinite values if np.isnan(X).any() or np.isinf(X).any(): logger.error("NaN or infinite values detected in the final dataset") raise ValueError("Dataset contains NaN or infinite values after preprocessing") # Convert labels to categorical y_categorical = to_categorical(y) # Split the data X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42) logger.info(f"Training set shape: {X_train.shape}") logger.info(f"Testing set shape: {X_test.shape}") # Multi-head LSTM with Attention Model definition def build_multi_head_lstm_attention(input_shape, num_classes, num_heads=3): def multi_head_lstm(input_layer, num_heads, units): lstm_heads = [] for _ in range(num_heads): lstm = LSTM(units, return_sequences=True)(input_layer) lstm_heads.append(lstm) return Concatenate()(lstm_heads) # Input layer inputs = Input(shape=input_shape) # Multi-head LSTM x = multi_head_lstm(inputs, num_heads=3, units=50) x = Dropout(0.5)(x) # Attention mechanism attention = Attention()([x, x]) attention = LayerNormalization()(attention) # Combine with attention x = Concatenate()([x, attention]) x = GlobalAveragePooling1D()(x) x = Dense(100, activation='relu')(x) x = Dropout(0.5)(x) # Output layer outputs = Dense(num_classes, activation='softmax')(x) # Create model model = Model(inputs=inputs, outputs=outputs) return model def train_and_evaluate_model(model, X_train, y_train, X_test, y_test, model_name="Multi-head LSTM with Attention"): # Compile model model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy']) # Define callbacks reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001) early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) # Train model logger.info(f"\nTraining {model_name}...") history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_split=0.2, callbacks=[reduce_lr, early_stopping], verbose=1) # Evaluate model loss, accuracy = model.evaluate(X_test, y_test, verbose=0) y_pred = model.predict(X_test) return model, history, accuracy, y_pred def plot_head_attention_weights(model, X_sample, num_heads=3, model_name="Multi-head LSTM with Attention"): # Get attention layer outputs for each head attention_outputs = [] for i in range(num_heads): head_output = model.layers[i+1].output # Skip input layer attention_model = Model(inputs=model.input, outputs=head_output) attention_outputs.append(attention_model.predict(X_sample[:1])) # Plot attention weights for each head fig, axes = plt.subplots(num_heads, 1, figsize=(15, 5*num_heads)) for i, attention in enumerate(attention_outputs): im = axes[i].imshow(attention[0].T, aspect='auto', cmap='viridis') axes[i].set_title(f'Head {i+1} Attention Weights') axes[i].set_xlabel('Time Step') axes[i].set_ylabel('Features') plt.colorbar(im, ax=axes[i]) plt.tight_layout() plt.savefig(f'head_attention_weights_{model_name.lower().replace(" ", "_")}.png') plt.close() def plot_head_contributions(model, X_test, y_test, num_heads=3, model_name="Multi-head LSTM with Attention"): # Get predictions for each activity using different heads y_true = np.argmax(y_test, axis=1) head_contributions = np.zeros((len(activity_mapping), num_heads)) for activity_idx in range(len(activity_mapping)): activity_mask = (y_true == activity_idx) if np.any(activity_mask): activity_samples = X_test[activity_mask] for head_idx in range(num_heads): head_output = model.layers[head_idx+1].output head_model = Model(inputs=model.input, outputs=head_output) head_activations = head_model.predict(activity_samples) head_contributions[activity_idx, head_idx] = np.mean(np.abs(head_activations)) # Plot heatmap of head contributions plt.figure(figsize=(10, 8)) sns.heatmap(head_contributions, xticklabels=[f'Head {i+1}' for i in range(num_heads)], yticklabels=[reverse_activity_mapping[i] for i in range(len(activity_mapping))], cmap='viridis', annot=True, fmt='.2f') plt.title(f'{model_name} - Head Contributions per Activity') plt.tight_layout() plt.savefig(f'head_contributions_{model_name.lower().replace(" ", "_")}.png') plt.close() return head_contributions def plot_training_history(history, model_name="Multi-head LSTM with Attention"): plt.figure(figsize=(15, 5)) # Plot accuracy plt.subplot(1, 2, 1) plt.plot(history.history['accuracy'], label='Training Accuracy') plt.plot(history.history['val_accuracy'], label='Validation Accuracy') plt.title(f'{model_name} - Accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend() # Plot loss plt.subplot(1, 2, 2) plt.plot(history.history['loss'], label='Training Loss') plt.plot(history.history['val_loss'], label='Validation Loss') plt.title(f'{model_name} - Loss') plt.xlabel('Epoch') plt.ylabel('Loss') plt.legend() plt.tight_layout() plt.savefig(f'training_history_{model_name.lower().replace(" ", "_")}.png') plt.close() def plot_confusion_matrix(y_true, y_pred, model_name="Multi-head LSTM with Attention"): y_pred_classes = np.argmax(y_pred, axis=1) y_true_classes = np.argmax(y_true, axis=1) cm = confusion_matrix(y_true_classes, y_pred_classes) plt.figure(figsize=(12, 10)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues') plt.title(f'{model_name} - Confusion Matrix') plt.ylabel('True label') plt.xlabel('Predicted label') # Set x and y tick labels to activity names tick_marks = np.arange(len(reverse_activity_mapping)) plt.xticks(tick_marks + 0.5, [reverse_activity_mapping[i] for i in range(len(reverse_activity_mapping))], rotation=45, ha='right') plt.yticks(tick_marks + 0.5, [reverse_activity_mapping[i] for i in range(len(reverse_activity_mapping))], rotation=0) plt.tight_layout() plt.savefig(f'confusion_matrix_{model_name.lower().replace(" ", "_")}.png') plt.close() return cm def save_model_results(model, history, accuracy, cm, head_contributions, model_name="Multi-head LSTM with Attention"): with open(f'{model_name.lower().replace(" ", "_")}_results.txt', 'w') as f: # Model architecture f.write(f"{model_name} Architecture\n") f.write("="*50 + "\n\n") model.summary(print_fn=lambda x: f.write(x + '\n')) f.write("\n") # Performance metrics f.write("Performance Metrics\n") f.write("-"*50 + "\n") f.write(f"Test Accuracy: {accuracy*100:.2f}%\n\n") # Confusion Matrix f.write("Confusion Matrix\n") f.write("-"*50 + "\n") np.savetxt(f, cm, fmt='%d') f.write("\n") # Head Contributions f.write("Head Contributions per Activity\n") f.write("-"*50 + "\n") for i in range(len(activity_mapping)): f.write(f"{reverse_activity_mapping[i]}:\n") for h in range(head_contributions.shape[1]): f.write(f" Head {h+1}: {head_contributions[i, h]:.4f}\n") f.write("\n") # Training history f.write("Training History\n") f.write("-"*50 + "\n") f.write("Epoch\tLoss\tAccuracy\tVal_Loss\tVal_Accuracy\n") for i in range(len(history.history['loss'])): f.write(f"{i+1}\t{history.history['loss'][i]:.4f}\t") f.write(f"{history.history['accuracy'][i]:.4f}\t") f.write(f"{history.history['val_loss'][i]:.4f}\t") f.write(f"{history.history['val_accuracy'][i]:.4f}\n") # Main execution if __name__ == "__main__": logger.info("Starting Multi-head LSTM with Attention model training and evaluation...") # Create and compile model input_shape = (X_train.shape[1], X_train.shape[2]) num_classes = y_train.shape[1] num_heads = 3 model = build_multi_head_lstm_attention(input_shape, num_classes, num_heads) # Print model summary model.summary() # Train and evaluate model model, history, accuracy, y_pred = train_and_evaluate_model( model, X_train, y_train, X_test, y_test ) # Generate visualizations and metrics cm = plot_confusion_matrix(y_test, y_pred) plot_training_history(history) plot_head_attention_weights(model, X_test, num_heads) head_contributions = plot_head_contributions(model, X_test, y_test, num_heads) # Save results save_model_results(model, history, accuracy, cm, head_contributions) # Save model model.save('multihead_lstm_attention_wisdm.h5') logger.info(f"\nMulti-head LSTM with Attention Results:") logger.info(f"Test Accuracy: {accuracy*100:.2f}%") logger.info("Model and results have been saved.")