MVA-2021 / dl_in_practice / hw1_basics / TP1_Ex3_MultiTask.ipynb
TP1_Ex3_MultiTask.ipynb
Raw
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Multi-task problem: colored USPS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import torchvision\n",
    "import torchvision.transforms as transforms\n",
    "from torch.utils.data import DataLoader, random_split, TensorDataset\n",
    "\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Colored USPS Dataset\n",
    "* Handwritten digits with 10 classes\n",
    "* Size of each image: 16x16 pixels \n",
    "* 6 000 data examples in training set, 1 291 examples in validation set, 2 007 in test set\n",
    "* We colorize each image with a random color within 5 (red, green, blue, magenta, yellow)\n",
    "* Each image has two labels: the number it represents (10 classes) and the color of the number (5 classes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Given a loaded USPS dataset, create a colozied version of it\n",
    "def colorize_dataset(dataset):\n",
    "    # array of colors\n",
    "    COLORS = torch.tensor([\n",
    "        [1.0, 0.0, 0.0], # 0 RED\n",
    "        [0.0, 1.0, 0.0], # 1 GREEN\n",
    "        [0.0, 0.0, 1.0], # 2 BLUE\n",
    "        [1.0, 1.0, 0.0], # 3 YELLOW\n",
    "        [1.0, 0.0, 1.0], # 4 MAGENTA\n",
    "    ])\n",
    "    N = len(dataset)\n",
    "    images = torch.tensor(dataset.data/255).view(N, 1, 16, 16)\n",
    "    labels = torch.tensor(dataset.targets).view(N, 1)\n",
    "    color_labels = torch.randint(0, 5, (N,))\n",
    "    colorized_images = images * COLORS[color_labels, :].view(N,3,1,1)\n",
    "    full_labels = torch.cat((labels, color_labels.view(N, 1)), dim=1)\n",
    "    return TensorDataset(colorized_images, full_labels)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Loading MNIST dataset from torchvision.dataset\n",
    "dataset = torchvision.datasets.USPS(root='USPS/',\n",
    "                                           train=True,\n",
    "                                           transform=transforms.ToTensor(),\n",
    "                                           download=False)\n",
    "\n",
    "dataset = colorize_dataset(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"The shape of the dataset is :\", dataset.tensors[0].size())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# split the dataset into training and validation sets\n",
    "train_set, val_set = random_split(dataset, [6000, 1291])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sample_index = 18\n",
    "\n",
    "plt.imshow(dataset[sample_index][0].permute(1, 2, 0), plt.cm.gray_r)\n",
    "plt.title(\"image label: {}\".format(dataset[sample_index][1]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Training the neural network"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Model(nn.Module):\n",
    "    \n",
    "    def __init__(self):\n",
    "        super(Model, self).__init__()\n",
    "        # The input size is 16*16. For a standard classification task, the output size should be the same as the number of classes\n",
    "        self.l_number = nn.Linear(16*16*3, 10)\n",
    "        # However here we deal with 2 tasks: the network outputs 2 labels, so there are two \"last\" layers in parallel\n",
    "        self.l_color = nn.Linear(16*16*3, 5)\n",
    "        \n",
    "    def forward(self, inputs):\n",
    "        h = inputs.view(-1, 16*16*3)\n",
    "        # Use softmax as the activation function for the last layer(s)\n",
    "        output_number = F.softmax(self.l_number(h), dim=1)\n",
    "        output_color = F.softmax(self.l_color(h), dim=1)\n",
    "        \n",
    "        return (output_number, output_color)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create the model: \n",
    "model = Model()\n",
    "\n",
    "# Choose the hyperparameters for training: \n",
    "num_epochs = 10\n",
    "batch_size = 10\n",
    "\n",
    "# Use mean squared loss function \n",
    "criterion = nn.MSELoss()\n",
    "\n",
    "# Use SGD optimizer with a learning rate of 0.01\n",
    "# It is initialized on our model\n",
    "optimizer = torch.optim.SGD(model.parameters(), lr=0.01)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define a function for training\n",
    "def train(num_epochs, batch_size, criterion, optimizer, model, dataset):\n",
    "    train_error = []\n",
    "    train_loader = DataLoader(dataset, batch_size, shuffle=True)\n",
    "    model.train()\n",
    "    for epoch in range(num_epochs):\n",
    "        epoch_average_loss = 0.0\n",
    "        for (images, labels) in train_loader:\n",
    "            (y_number, y_color) = model(images.float())\n",
    "            \n",
    "            # One-hot encoding or labels so as to calculate MSE error:\n",
    "            number_onehot = F.one_hot(labels[:,0], 10).float()\n",
    "            color_onehot = F.one_hot(labels[:,1], 5).float()\n",
    "            \n",
    "            loss = criterion(y_number, number_onehot) + criterion(y_color, color_onehot)\n",
    "            optimizer.zero_grad()\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "            epoch_average_loss += loss.item() * batch_size / len(dataset)\n",
    "        train_error.append(epoch_average_loss)\n",
    "        print('Epoch [{}/{}], Loss: {:.4f}'\n",
    "                      .format(epoch+1, num_epochs, epoch_average_loss))\n",
    "    return train_error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_error = train(num_epochs, batch_size, criterion, optimizer, model, train_set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot the training error wrt. the number of epochs: \n",
    "plt.plot(range(1, num_epochs+1), train_error)\n",
    "plt.xlabel(\"num_epochs\")\n",
    "plt.ylabel(\"Train error\")\n",
    "plt.title(\"Visualization of convergence\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Evaluate the model on the validation set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate the accuracy to evaluate the model\n",
    "def accuracy(dataset, model):\n",
    "\n",
    "    with torch.no_grad():\n",
    "        number_correct = 0\n",
    "        color_correct = 0\n",
    "        both_correct = 0\n",
    "        dataloader = DataLoader(dataset)\n",
    "        for images, labels in dataloader:\n",
    "            images = images.view(-1, 16*16)\n",
    "            (y_number, y_color) = model(images.float())\n",
    "            _, number_predicted = torch.max(y_number.data, 1) \n",
    "            number_correct += (number_predicted == labels[:, 0]).sum()\n",
    "            _, color_predicted = torch.max(y_color.data, 1) \n",
    "            color_correct += (color_predicted == labels[:, 1]).sum()\n",
    "            both_correct += ((color_predicted == labels[:, 1]) and (number_predicted == labels[:, 0])).sum()\n",
    "\n",
    "    print('Accuracy of the model for numbers : {:.2f} %'.format(100*number_correct.item()/ len(dataset)))\n",
    "    print('Accuracy of the model for colors : {:.2f} %'.format(100*color_correct.item()/ len(dataset)))\n",
    "    print('Accuracy of the model for both : {:.2f} %'.format(100*both_correct.item()/ len(dataset)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy(val_set, model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "val_index = 66\n",
    "\n",
    "(image, label) = val_set[val_index]\n",
    "(y_number, y_color) = model(image.float())\n",
    "_, number_prediction = torch.max(y_number.data, 1)\n",
    "_, color_prediction = torch.max(y_color.data, 1)\n",
    "\n",
    "plt.imshow(image.permute(1, 2, 0), interpolation='nearest')\n",
    "plt.title(\"Prediction: number=%d, color=%d\" % (number_prediction, color_prediction))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 1: Impact of the architecture of the model\n",
    "Define your own class `Model` to improve the predictions:\n",
    "\n",
    "* The convolutional layer can be a good choice to deal with images. Replace nn.Linear with [nn.Conv2d](https://pytorch.org/docs/stable/nn.html#conv2d).\n",
    "* Try to add more layers (1, 2, 3, more ?)\n",
    "* Change the number of neurons in hidden layers (5, 10, 20, more ?)\n",
    "* Try different activation functions such as [sigmoid](https://pytorch.org/docs/stable/nn.functional.html#torch.nn.functional.sigmoid), [tanh](https://pytorch.org/docs/stable/nn.functional.html#torch.nn.functional.tanh), [relu](https://pytorch.org/docs/stable/nn.functional.html#torch.nn.functional.relu), etc.\n",
    "* __Your network generates two different outputs, how much weight-sharing (i.e. how many common layers) between these two paths is appropriate?__"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 2: Impact of the optimizer\n",
    "Retrain the model by using different parameters of the optimizer; you can change its parameters in the cell initializing it, after the definition of your model.\n",
    "\n",
    "* Use different batch sizes, from 10 to 1 000 for instance\n",
    "* Try different values of the learning rate (between 0.001 and 10), and see how these impact the training process. Do all network architectures react the same way to different learning rates?\n",
    "* Change the duration of the training by increasing the number of epochs\n",
    "* Try other optimizers, such as [Adam](https://pytorch.org/docs/stable/optim.html?highlight=adam#torch.optim.Adam) or [RMSprop](https://pytorch.org/docs/stable/optim.html?highlight=rmsprop#torch.optim.RMSprop)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 3: Impact of the loss function\n",
    "The MSE error is rarely used in this case. The cross entropy loss can be a better choice for multi-classification problems. In pytorch, the cross entropy loss is defined by [nn.CrossEntropyLoss](https://pytorch.org/docs/stable/nn.html#crossentropyloss). Replace the MSE loss by this one to observe its impact.\n",
    "\n",
    "**Note:** In order to use nn.CrossEntropyLoss correctly, don't add an activation function to the last layer of your network. And one-hot encoding is no longer needed to calculate the loss, delete the encoding procedures in function `train`.   "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 4: Prediction on test set\n",
    "\n",
    "Once you have a model that seems satisfying on the validation dataset, you SHOULD evaluate it on a test dataset that has never been used before, to obtain a final accuracy value."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Loading MNIST test set from torchvision.dataset\n",
    "test_set = torchvision.datasets.USPS(root='USPS/',\n",
    "                                         train=False,\n",
    "                                         transform=transforms.ToTensor(),\n",
    "                                         download=False)\n",
    "test_set = colorize_dataset(test_set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy(test_set, model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}