################################################################################ # CSE 253: Programming Assignment 4 # Code snippet by Ajit Kumar, Savyasachi # Fall 2020 ################################################################################ import sys import matplotlib.pyplot as plt import numpy as np import torch import torch.nn as nn from datetime import datetime from constants import ROOT_STATS_DIR, GRADIENT_ACCUMULATE from dataset_factory import get_datasets from model_factory import get_model from file_utils import * import warnings import json from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence import torchvision.transforms as transforms from torch.cuda.amp import GradScaler, autocast from datasets import load_metric warnings.filterwarnings("ignore", category=UserWarning) # Class to encapsulate a neural experiment. # The boilerplate code to setup the experiment, log stats, checkpoints and plotting have been provided to you. # You only need to implement the main training logic of your experiment and implement train, val and test methods. # You are free to modify or restructure the code as per your convenience. class Experiment(object): def __init__(self, name): config_data = read_file_in_dir(sys.path[0], name + '.json') if config_data is None: raise Exception("Configuration file doesn't exist: ", name) self.__config_data = config_data self.__name = config_data['experiment_name'] self.__experiment_dir = os.path.join(ROOT_STATS_DIR, self.__name) # Load Datasets self.__vocab, self.__train_loader, self.__val_loader, self.__test_loader = get_datasets(config_data) # Setup Experiment self.__generation_config = config_data['generation'] self.__epochs = config_data['experiment']['num_epochs'] self.__current_epoch = 0 self.__training_losses = [] self.__val_losses = [] self.__best_model = None # Save your best model in this field and use this in test method. # Init Model self.__model = get_model(config_data, self.__vocab) self.__criterion = nn.CrossEntropyLoss() self.__optimizer = torch.optim.Adam(self.__model.parameters(), lr=self.__config_data['experiment']['learning_rate']) self.__scaler = GradScaler() self.__init_model() # Load Experiment Data if available self.__load_experiment() # Loads the experiment data if exists to resume training from last saved checkpoint. def __load_experiment(self): os.makedirs(ROOT_STATS_DIR, exist_ok=True) if os.path.exists(self.__experiment_dir): self.__training_losses = read_file_in_dir(self.__experiment_dir, 'training_losses.txt') self.__val_losses = read_file_in_dir(self.__experiment_dir, 'val_losses.txt') self.__current_epoch = len(self.__training_losses) state_dict = torch.load(os.path.join(self.__experiment_dir, 'latest_model.pt')) self.__model.load_state_dict(state_dict['model']) self.__optimizer.load_state_dict(state_dict['optimizer']) else: os.makedirs(self.__experiment_dir) def __init_model(self): if torch.cuda.is_available(): self.__model = self.__model.cuda().float() self.__criterion = self.__criterion.cuda() # Main method to run your experiment. Should be self-explanatory. def run(self): start_epoch = self.__current_epoch for epoch in range(start_epoch, self.__epochs): # loop over the dataset multiple times start_time = datetime.now() self.__current_epoch = epoch train_loss = self.__train() val_loss = self.__val() self.__record_stats(train_loss, val_loss) self.__log_epoch_stats(start_time) self.__save_model() def convert_question(self, prediction): """ Converts predicted question indices to word tokens prediction: N x Q """ word_idxs = prediction.cpu().numpy() captions = [] for i in range(prediction.shape[0]): words = [self.__vocab.idx2word[idx].lower() for idx in word_idxs[i]] try: end_idx = words.index('<end>') + 1 # cut off after predicting end except ValueError as e: end_idx = None words = words[:end_idx] captions.append(words) to_return = [] for i in range(len(captions)): clean_list = ['<pad>', '<start>', '<end>', '<unk>', ' ', ';', ',', '.', '\'', '-', '(', ')', '[', ']', '@', '$', \ '%', '!', '?', '/', '+', '^', '&', '*'] cleaned_caption = [word for word in captions[i] if word not in clean_list] to_return.append(cleaned_caption) return to_return def __train(self): self.__model.train() training_loss = 0 gradient_accumulation = GRADIENT_ACCUMULATE for i, (passages, answers, questions) in enumerate(self.__train_loader): if torch.cuda.is_available: passages = passages.cuda().long() answers = answers.cuda().long() questions = questions.cuda().long() with autocast(): out_seq = self.__model(passages, answers, questions) # N x Q x vocab_size loss = self.__criterion(out_seq.permute(0, 2, 1), questions) self.__scaler.scale(loss / gradient_accumulation).backward() if (i+1) % gradient_accumulation == 0: # Update every k batches instead of every batch, allows for smaller batch sizes self.__scaler.step(self.__optimizer) self.__scaler.update() self.__optimizer.zero_grad() batch_loss = loss.sum().item() / questions.shape[1] training_loss += batch_loss if i % 100 == 0: print("Batch {} Loss: {}".format(i, batch_loss)) training_loss /= len(self.__train_loader) return training_loss def __val(self): self.__model.eval() val_loss = 0 with torch.no_grad(): for i, (passages, answers, questions) in enumerate(self.__val_loader): if torch.cuda.is_available: passages = passages.cuda().long() answers = answers.cuda().long() questions = questions.cuda().long() with autocast(): out_seq = self.__model(passages, answers, questions) loss = self.__criterion(out_seq.permute(0, 2, 1), questions) batch_loss = loss.sum().item() / questions.shape[1] val_loss += batch_loss val_loss /= len(self.__val_loader) if len(self.__val_losses) == 0: self.__best_model = self.__model.state_dict() torch.save(self.__model.state_dict(), os.path.join(self.__experiment_dir, 'best_model.pth')) elif val_loss < min(self.__val_losses): self.__best_model = self.__model.state_dict() torch.save(self.__model.state_dict(), os.path.join(self.__experiment_dir, 'best_model.pth')) return val_loss def test(self): self.__model.eval() test_loss = 0 meteor_score = 0 rougeL_score = 0 bleu1_score = 0 bleu4_score = 0 model = get_model(self.__config_data, self.__vocab) model.load_state_dict(torch.load(os.path.join(self.__experiment_dir, 'best_model.pth'))) model.temperature = self.__config_data['generation']['temperature'] model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")) model.eval() meteor = load_metric("meteor") rouge = load_metric("rouge") bleu = load_metric("bleu") with torch.no_grad(): for i, (passages, answers, questions) in enumerate(self.__test_loader): if torch.cuda.is_available: passages = passages.cuda().long() answers = answers.cuda().long() questions = questions.cuda().long() with autocast(): out_seq = model(passages, answers, questions) # N x Q loss = self.__criterion(out_seq.permute(0, 2, 1), questions) batch_loss = loss.sum().item() / questions.shape[1] test_loss += batch_loss # Metric Evaluation predictions = model.predict(passages, answers) # N x Q predictions = self.convert_question(predictions) # list of lists of tokens true_questions = self.convert_question(questions) # list of lists of tokens bleu_list = [[elem] for elem in true_questions] bleu1_score += bleu.compute(predictions=predictions, references=bleu_list, max_order=1)['bleu'] bleu4_score += bleu.compute(predictions=predictions, references= bleu_list, max_order=4)['bleu'] predicted_strings = [' '.join(elem) for elem in predictions] true_strings = [' '.join(elem) for elem in true_questions] meteor_score += meteor.compute(predictions=predicted_strings, references=true_strings)['meteor'] rougeL_score += rouge.compute(predictions=predicted_strings, references=true_strings)['rougeL'].mid.fmeasure test_loss /= len(self.__test_loader) perp = np.exp(test_loss) # Normalize metric scores bleu1_score /= len(self.__test_loader) bleu4_score /= len(self.__test_loader) meteor_score /= len(self.__test_loader) rougeL_score /= len(self.__test_loader) result_str = "Test Performance: Loss: {}, Perplexity: {}, Bleu1: {}, Bleu4: {}, Meteor: {}, Rouge-L: {}".format( test_loss, perp, bleu1_score, bleu4_score, meteor_score, rougeL_score) self.__log(result_str) dic = {'Test Loss': test_loss, 'Perplexity': perp, 'BLEU1': bleu1_score, 'BLEU4': bleu4_score, 'METEOR': meteor_score, 'ROUGE-L': rougeL_score} with open(os.path.join(self.__experiment_dir, 'results.json'), 'w') as f: json.dump(dic, f) return test_loss, bleu1_score, bleu4_score def __save_model(self): root_model_path = os.path.join(self.__experiment_dir, 'latest_model.pt') model_dict = self.__model.state_dict() state_dict = {'model': model_dict, 'optimizer': self.__optimizer.state_dict()} torch.save(state_dict, root_model_path) def __record_stats(self, train_loss, val_loss): self.__training_losses.append(train_loss) self.__val_losses.append(val_loss) self.plot_stats() write_to_file_in_dir(self.__experiment_dir, 'training_losses.txt', self.__training_losses) write_to_file_in_dir(self.__experiment_dir, 'val_losses.txt', self.__val_losses) def __log(self, log_str, file_name=None): print(log_str) log_to_file_in_dir(self.__experiment_dir, 'all.log', log_str) if file_name is not None: log_to_file_in_dir(self.__experiment_dir, file_name, log_str) def __log_epoch_stats(self, start_time): time_elapsed = datetime.now() - start_time time_to_completion = time_elapsed * (self.__epochs - self.__current_epoch - 1) train_loss = self.__training_losses[self.__current_epoch] val_loss = self.__val_losses[self.__current_epoch] summary_str = "Epoch: {}, Train Loss: {}, Val Loss: {}, Took {}, ETA: {}\n" summary_str = summary_str.format(self.__current_epoch + 1, train_loss, val_loss, str(time_elapsed), str(time_to_completion)) self.__log(summary_str, 'epoch.log') def plot_stats(self): e = len(self.__training_losses) x_axis = np.arange(1, e + 1, 1) plt.figure() plt.plot(x_axis, self.__training_losses, label="Training Loss") plt.plot(x_axis, self.__val_losses, label="Validation Loss") plt.xlabel("Epochs") plt.legend(loc='best') plt.title(self.__name + " Stats Plot") plt.savefig(os.path.join(self.__experiment_dir, "stat_plot.png")) plt.show()