# Inspired by https://github.com/cezannec/CNN_Text_Classification/blob/master/CNN_Text_Classification.ipynb import embeddings import torch import torch.nn as nn from run_sentiment import encode_sentiment_data from datasets import load_dataset class SentimentCNN(nn.Module): def __init__( self, embedding_dim, feature_map_size=100, kernel_sizes=[3, 4, 5], freeze_embeddings=True, drop_prob=0.5, ): """ Initialize the model by setting up the layers. """ super(SentimentCNN, self).__init__() # set class vars self.feature_map_size = feature_map_size self.embedding_dim = embedding_dim # 1. embedding layer <-- for now disable the embedding layer since we don't retrain the embeddings # self.embedding = nn.Embedding(vocab_size, embedding_dim) # set weights to pre-trained # self.embedding.weight = nn.Parameter(torch.from_numpy(embed_model.vectors)) # all vectors # (optional) freeze embedding weights # if freeze_embeddings: # self.embedding.requires_grad = False # 2. convolutional layers self.convs = nn.ModuleList( [nn.Conv1d(embedding_dim, feature_map_size, k) for k in kernel_sizes] ) # 3. final, fully-connected layer for classification self.fc = nn.Linear(len(kernel_sizes) * feature_map_size, 1) # 4. dropout and sigmoid layers self.dropout = nn.Dropout(drop_prob) self.sig = nn.Sigmoid() def conv_and_pool(self, x, conv): """ Convolutional + max pooling layer """ # squeeze last dim to get size: (batch_size, feature_map_size, conv_seq_length) x = nn.functional.relu(conv(x)) x_max = x.max(dim=2)[0] # returns (batch_size, feature_map_size) return x_max # Defines how a batch of inputs, x, passes through the model layers. # returns a single, sigmoid-activated class score as output. def forward(self, embeds): # permute embedding dim to input channels (batch_size x in_channels x seq_length) x = embeds.permute(0, 2, 1) # get output of each conv-pool layer conv_results = [self.conv_and_pool(x, conv) for conv in self.convs] # concatenate results and add dropout x = torch.cat(conv_results, 1) x = self.dropout(x) # final logit logit = self.fc(x) # sigmoid-activated --> a class score return self.sig(logit) # training loop def train( model, data_train, data_val, learning_rate=0.001, max_epochs=50, batch_size=128 ): # loss and optimization functions criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) (X_train, y_train) = data_train (X_val, y_val) = data_val n_training_samples = len(X_train) batch_size = min(batch_size, n_training_samples) model.train() for epoch in range(1, max_epochs + 1): # batch loop train_correct = 0 train_loss = 0 n_batches = 0 for batch_num, example_num in enumerate( range(0, n_training_samples, batch_size) ): y = torch.tensor(y_train[example_num : example_num + batch_size]) x = torch.tensor(X_train[example_num : example_num + batch_size]) model.zero_grad() # get the output from the model output = model(x) # calculate the loss and perform backprop loss = criterion(output.squeeze(), y.float()) loss.backward() optimizer.step() train_loss += loss.item() n_batches += 1 for i, y_true in enumerate(y): if output[i] > 0.5 and y_true == 1.0 or output[i] < 0.5 and y_true == 0: train_correct += 1 # Evaluate on validation set after epoch model.eval() val_losses = [] model.eval() y = torch.tensor(y_val) x = torch.tensor(X_val) output = model(x) val_loss = criterion(output.squeeze(), y.float()) val_losses.append(val_loss.item()) val_correct = 0 for i, y_true in enumerate(y): if output[i] > 0.5 and y_true == 1.0 or output[i] < 0.5 and y_true == 0: val_correct += 1 model.train() # print(y, output) print("Epoch: {}/{}...".format(epoch, max_epochs)) print( "Train loss: {:.6f}...".format(train_loss / n_batches), f"Train correct: {train_correct}/{len(X_train)}", f"Train accuracy: {train_correct/len(X_train):.2%}", ) print( "Val loss: {:.6f}".format(val_loss.item()), f"Val correct: {val_correct}/{len(X_val)}", f"Val accuracy: {val_correct/len(X_val):.2%}", ) print() if __name__ == "__main__": EMBEDDING_SIZE = 50 (X_train, y_train), (X_val, y_val) = encode_sentiment_data( load_dataset("glue", "sst2"), embeddings.GloveEmbedding( "wikipedia_gigaword", d_emb=EMBEDDING_SIZE, show_progress=True ), 2500, 250, ) print("X_train size:", len(X_train)) print("X_val size:", len(X_val)) train( SentimentCNN(EMBEDDING_SIZE, kernel_sizes=[3, 4, 5]), (X_train, y_train), (X_val, y_val), learning_rate=0.001, max_epochs=1000, )