{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 72 }, "colab_type": "code", "executionInfo": { "elapsed": 4145, "status": "ok", "timestamp": 1600236197763, "user": { "displayName": "Punit Jha", "photoUrl": "", "userId": "07885534541681120711" }, "user_tz": 300 }, "id": "1k7tLoE1Fymv", "outputId": "949f6593-de8c-4337-89d6-b5083f53cc3d" }, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "import random\n", "import numpy as np\n", "#import data_process_2\n", "#from . import models\n", "#from data_process_2 import *\n", "from data_process_2 import get_CIFAR10_data, get_MUSHROOM_data\n", "from scipy.spatial import distance\n", "from models import Perceptron, SVM, Softmax, Logistic\n", "from kaggle_submission import output_submission_csv\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "AqIdthcmekYr" }, "source": [ "# Loading CIFAR-10" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "9bgPpDIcekYs" }, "source": [ "In the following cells we determine the number of images for each split and load the images.\n", "<br /> \n", "TRAIN_IMAGES + VAL_IMAGES = (0, 50000]\n", ", TEST_IMAGES = 10000" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": {}, "colab_type": "code", "executionInfo": { "elapsed": 303, "status": "ok", "timestamp": 1600236238833, "user": { "displayName": "Punit Jha", "photoUrl": "", "userId": "07885534541681120711" }, "user_tz": 300 }, "id": "0oQBKJEbekYw" }, "outputs": [], "source": [ "# You can change these numbers for experimentation\n", "# For submission we will use the default values \n", "TRAIN_IMAGES = 40000\n", "VAL_IMAGES = 10000" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": {}, "colab_type": "code", "executionInfo": { "elapsed": 2700, "status": "ok", "timestamp": 1600236242630, "user": { "displayName": "Punit Jha", "photoUrl": "", "userId": "07885534541681120711" }, "user_tz": 300 }, "id": "wPToj6NIekY4" }, "outputs": [], "source": [ "data = get_CIFAR10_data(TRAIN_IMAGES, VAL_IMAGES)\n", "X_train_CIFAR, y_train_CIFAR = data['X_train'], data['y_train']\n", "X_val_CIFAR, y_val_CIFAR = data['X_val'], data['y_val']\n", "X_test_CIFAR, y_test_CIFAR = data['X_test'], data['y_test']\n", "n_class_CIFAR = len(np.unique(y_test_CIFAR))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "OycnCNYlekZA" }, "source": [ "Convert the sets of images from dimensions of **(N, 3, 32, 32) -> (N, 3072)** where N is the number of images so that each **3x32x32** image is represented by a single vector." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": {}, "colab_type": "code", "executionInfo": { "elapsed": 226, "status": "ok", "timestamp": 1600236244243, "user": { "displayName": "Punit Jha", "photoUrl": "", "userId": "07885534541681120711" }, "user_tz": 300 }, "id": "6fHCZ5KlekZB" }, "outputs": [], "source": [ "X_train_CIFAR = np.reshape(X_train_CIFAR, (X_train_CIFAR.shape[0], -1))\n", "X_val_CIFAR = np.reshape(X_val_CIFAR, (X_val_CIFAR.shape[0], -1))\n", "X_test_CIFAR = np.reshape(X_test_CIFAR, (X_test_CIFAR.shape[0], -1))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 109 }, "colab_type": "code", "executionInfo": { "elapsed": 252, "status": "ok", "timestamp": 1600236246443, "user": { "displayName": "Punit Jha", "photoUrl": "", "userId": "07885534541681120711" }, "user_tz": 300 }, "id": "QZysJ--XNSch", "outputId": "1fa4987c-592c-44d3-b867-d9db18009cd1" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(40000, 3072)\n", "(40000,)\n", "(10000, 3072)\n", "(10000, 3072)\n", "10\n" ] } ], "source": [ "print(X_train_CIFAR.shape) # each image is a single col. vector\n", "print(y_train_CIFAR.shape)\n", "print(X_val_CIFAR.shape)\n", "print(X_test_CIFAR.shape)\n", "print(n_class_CIFAR)\n" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "dHAh3nYoekZH" }, "source": [ "# Loading Mushroom" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "JCtta_lGekZI" }, "source": [ "In the following cells we determine the splitting of the mushroom dataset.\n", "<br /> TRAINING + VALIDATION = 0.8, TESTING = 0.2" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": {}, "colab_type": "code", "executionInfo": { "elapsed": 362, "status": "ok", "timestamp": 1600236252657, "user": { "displayName": "Punit Jha", "photoUrl": "", "userId": "07885534541681120711" }, "user_tz": 300 }, "id": "mdFNpml5ekZK" }, "outputs": [], "source": [ "# TRAINING = 0.6 indicates 60% of the data is used as the training dataset.\n", "VALIDATION = 0.2" ] }, { "cell_type": "code", "execution_count": 116, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 72 }, "colab_type": "code", "executionInfo": { "elapsed": 1312, "status": "ok", "timestamp": 1600236255536, "user": { "displayName": "Punit Jha", "photoUrl": "", "userId": "07885534541681120711" }, "user_tz": 300 }, "id": "2KYZDviLekZR", "outputId": "b30dcc2f-d4bf-4681-9190-23cf4825580f" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2\n", "Number of train samples: 4874\n", "Number of val samples: 1625\n", "Number of test samples: 1625\n" ] } ], "source": [ "data = get_MUSHROOM_data(VALIDATION)\n", "X_train_MR, y_train_MR = data['X_train'], data['y_train']\n", "X_val_MR, y_val_MR = data['X_val'], data['y_val']\n", "X_test_MR, y_test_MR = data['X_test'], data['y_test']\n", "n_class_MR = len(np.unique(y_test_MR))\n", "print(n_class_MR)\n", "#Since the perceptron is based on y={-1,1} formulation in the slides.\n", "y_train_MR = np.asarray([-1 if each_element == 0 else 1 for each_element in y_train_MR])\n", "y_val_MR = np.asarray([-1 if each_element == 0 else 1 for each_element in y_val_MR])\n", "y_test_MR = np.asarray([-1 if each_element == 0 else 1 for each_element in y_test_MR])\n", "\n", "print(\"Number of train samples: \", X_train_MR.shape[0])\n", "print(\"Number of val samples: \", X_val_MR.shape[0])\n", "print(\"Number of test samples: \", X_test_MR.shape[0])\n", "# for x in y_test_MR:\n", "# print(x)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "jNsJK5F1ekZW" }, "source": [ "### Get Accuracy" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "qIlzQ75TekZX" }, "source": [ "This function computes how well your model performs using accuracy as a metric." ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "colab": {}, "colab_type": "code", "executionInfo": { "elapsed": 336, "status": "ok", "timestamp": 1600236259069, "user": { "displayName": "Punit Jha", "photoUrl": "", "userId": "07885534541681120711" }, "user_tz": 300 }, "id": "MGTRZvMuekZX" }, "outputs": [], "source": [ "def get_acc(pred, y_test):\n", " if(len(y_test) != len (pred)):\n", " print(\"Lenghts do not match\",len(y_test),len (pred))\n", " return np.sum(y_test==pred)/len(y_test)*100" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 90 }, "colab_type": "code", "executionInfo": { "elapsed": 906, "status": "ok", "timestamp": 1600231267629, "user": { "displayName": "Punit Jha", "photoUrl": "", "userId": "07885534541681120711" }, "user_tz": 300 }, "id": "6QnldZWYPHso", "outputId": "feebf554-7b0e-4529-f9a6-04d78b49bcaf" }, "outputs": [], "source": [ "# !cp models/Perceptron.py .\n", "# !python3 Perceptron.py\n", "mean_image = np.mean(X_train_CIFAR, axis=0)\n", "X_train_CIFAR -= mean_image\n", "X_val_CIFAR -= mean_image\n", "X_test_CIFAR -= mean_image\n" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "Y8QCLuxnekZd" }, "source": [ "# Perceptron" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "v3YOa0NSekZf" }, "source": [ "Perceptron has 2 hyperparameters that you can experiment with:\n", "- **Learning rate** - controls how much we change the current weights of the classifier during each update. We set it at a default value of 0.5, but you should experiment with different values. We recommend changing the learning rate by factors of 10 and observing how the performance of the classifier changes. You should also try adding a **decay** which slowly reduces the learning rate over each epoch.\n", "- **Number of Epochs** - An epoch is a complete iterative pass over all of the data in the dataset. During an epoch we predict a label using the classifier and then update the weights of the classifier according the perceptron update rule for each sample in the training set. You should try different values for the number of training epochs and report your results." ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "Ky0y-0HRekZf" }, "source": [ "You will implement the Perceptron classifier in the **models/Perceptron.py**\n", "\n", "The following code: \n", "- Creates an instance of the Perceptron classifier class \n", "- The train function of the Perceptron class is trained on the training data\n", "- We use the predict function to find the training accuracy as well as the testing accuracy\n" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "VsxAA2pKekZh" }, "source": [ "## Train Perceptron on CIFAR" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 54 }, "colab_type": "code", "id": "NMLb3HHFekZi", "outputId": "bd62bc9d-88c9-4790-d332-6e3517a7e679" }, "outputs": [], "source": [ " ##################################\n", " #The output was cleared. Optimal output in the cell below\n", " ##################################\n", "\n", "n_epochs=[10,100,500]\n", "lr = [0.0005,0.005,0.05,0.5,1,1.5]\n", "# n_epochs=[100]\n", "# lr=[0.5]\n", "for ll in lr:\n", " for epo in n_epochs:\n", " percept_CIFAR = Perceptron(n_class_CIFAR, ll, epo)\n", " #print(\"hehehe\")\n", " percept_CIFAR.train(X_train_CIFAR, y_train_CIFAR)\n", " pred_percept = percept_CIFAR.predict(X_train_CIFAR)\n", " print('Training accuracy for LR= %f and Epochs= %f is given by: %f' % (ll,epo, get_acc(pred_percept, y_train_CIFAR)))\n", " pred_percept = percept_CIFAR.predict(X_val_CIFAR)\n", " #print(pred_percept)\n", " print('The validation accuracy for LR= %f and Epochs= %f is given by: %f' % (ll,epo,get_acc(pred_percept, y_val_CIFAR)))\n", " pred_percept = percept_CIFAR.predict(X_test_CIFAR)\n", " print('The testing accuracy for LR= %f and Epochs= %f is given by: %f' % (ll,epo,get_acc(pred_percept, y_test_CIFAR)))\n", "\n" ] }, { "cell_type": "code", "execution_count": 202, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "executionInfo": { "elapsed": 355, "status": "ok", "timestamp": 1600231358866, "user": { "displayName": "Punit Jha", "photoUrl": "", "userId": "07885534541681120711" }, "user_tz": 300 }, "id": "fsO_1xKXekZn", "outputId": "f327ca31-08af-4544-b0fb-950244f1c152" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The training accuracy is given by: 29.572500\n" ] } ], "source": [ "#Using the optimal value from the above calculations to train the model\n", "n_epochs=100\n", "lr = 0.0005\n", "percept_CIFAR = Perceptron(n_class_CIFAR, lr, n_epochs)\n", "percept_CIFAR.train(X_train_CIFAR, y_train_CIFAR)\n", "pred_percept = percept_CIFAR.predict(X_train_CIFAR)\n", "pred_percept = percept_CIFAR.predict(X_train_CIFAR)\n", "print('The training accuracy is given by: %f' % (get_acc(pred_percept, y_train_CIFAR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "Fl9dVFkyekZs" }, "source": [ "### Validate Perceptron on CIFAR" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": {}, "colab_type": "code", "id": "pau9aDtvekZs" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The validation accuracy is given by: 29.240000\n" ] } ], "source": [ "pred_percept = percept_CIFAR.predict(X_val_CIFAR)\n", "#print(pred_percept)\n", "print('The validation accuracy is given by: %f' % (get_acc(pred_percept, y_val_CIFAR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "8q2_V0AGekZy" }, "source": [ "### Test Perceptron on CIFAR" ] }, { "cell_type": "code", "execution_count": 199, "metadata": { "colab": {}, "colab_type": "code", "id": "g6n24w3RekZz" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The testing accuracy is given by: 29.440000\n" ] } ], "source": [ "pred_percept = percept_CIFAR.predict(X_test_CIFAR)\n", "print('The testing accuracy is given by: %f' % (get_acc(pred_percept, y_test_CIFAR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "QdeMRZSvekZ4" }, "source": [ "### Perceptron_CIFAR Kaggle Submission\n", "\n", "Once you are satisfied with your solution and test accuracy, output a file to submit your test set predictions to the Kaggle for Assignment 1 CIFAR. Use the following code to do so:" ] }, { "cell_type": "code", "execution_count": 200, "metadata": { "colab": {}, "colab_type": "code", "id": "15wzgCZAekZ4" }, "outputs": [], "source": [ "output_submission_csv('kaggle/perceptron_submission_CIFAR.csv', percept_CIFAR.predict(X_test_CIFAR))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "0SmZK6amekZ_" }, "source": [ "## Train Perceptron on Mushroom" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "gYT5PAfTekaA" }, "outputs": [], "source": [ " ##################################\n", " #The output was cleared. Optimal output in the cell below\n", " ##################################\n", "lr = [0.0005,0.005,0.05,0.5,1,1.5]\n", "n_epoc=[10,100,500,1000]\n", "\n", "for ll in lr:\n", " for epo in n_epoc:\n", " percept_MR = Perceptron(n_class_MR, ll, epo)\n", " percept_MR.train(X_train_MR, y_train_MR)\n", " pred_percept = percept_MR.predict(X_train_MR)\n", " print('Training accuracy for LR=%f and Epochs=%f is given by: %f' % (ll,epo,get_acc(pred_percept, y_train_MR)))\n", " pred_percept = percept_MR.predict(X_val_MR)\n", " print('Validation accuracy for LR=%f and Epochs=%f is given by: %f' % (ll,epo,get_acc(pred_percept, y_val_MR)))\n", " pred_percept = percept_MR.predict(X_test_MR)\n", " print('Testing accuracy for LR=%f and Epochs=%f is given by: %f' % (ll,epo,get_acc(pred_percept, y_test_MR)))\n", " " ] }, { "cell_type": "code", "execution_count": 79, "metadata": { "colab": {}, "colab_type": "code", "id": "0Ki9gPTPekaF" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The training accuracy is given by: 93.229380\n" ] } ], "source": [ "#Using the optimal value from the above calculations to train the model\n", "lr = 0.000500\n", "n_epoc=100\n", "percept_MR = Perceptron(n_class_MR, lr, n_epoc)\n", "percept_MR.train(X_train_MR, y_train_MR)\n", "pred_percept = percept_MR.predict(X_train_MR)\n", "print('The training accuracy is given by: %f' % (get_acc(pred_percept, y_train_MR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "E1rAuE0yekaI" }, "source": [ "### Validate Perceptron on Mushroom" ] }, { "cell_type": "code", "execution_count": 80, "metadata": { "colab": {}, "colab_type": "code", "id": "w7cjqgtKekaJ" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The validation accuracy is given by: 92.307692\n" ] } ], "source": [ "pred_percept = percept_MR.predict(X_val_MR)\n", "print('The validation accuracy is given by: %f' % (get_acc(pred_percept, y_val_MR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "Q8OyUgWzekaP" }, "source": [ "### Test Perceptron on Mushroom" ] }, { "cell_type": "code", "execution_count": 81, "metadata": { "colab": {}, "colab_type": "code", "id": "DShO9wn0ekaP" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The testing accuracy is given by: 92.676923\n" ] } ], "source": [ "pred_percept = percept_MR.predict(X_test_MR)\n", "print('The testing accuracy is given by: %f' % (get_acc(pred_percept, y_test_MR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "qL1K-VsGekaS" }, "source": [ "# Support Vector Machines (with SGD)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "JR2q718JekaT" }, "source": [ "Next, you will implement a \"soft margin\" SVM. In this formulation you will maximize the margin between positive and negative training examples and penalize margin violations using a hinge loss.\n", "\n", "We will optimize the SVM loss using SGD. This means you must compute the loss function with respect to model weights. You will use this gradient to update the model weights.\n", "\n", "SVM optimized with SGD has 3 hyperparameters that you can experiment with :\n", "- **Learning rate** - similar to as defined above in Perceptron, this parameter scales by how much the weights are changed according to the calculated gradient update. \n", "- **Epochs** - similar to as defined above in Perceptron.\n", "- **Regularization constant** - Hyperparameter to determine the strength of regularization. In this case it is a coefficient on the term which maximizes the margin. You could try different values. The default value is set to 0.05." ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "KJ9uwxvmekaU" }, "source": [ "You will implement the SVM using SGD in the **models/SVM.py**\n", "\n", "The following code: \n", "- Creates an instance of the SVM classifier class \n", "- The train function of the SVM class is trained on the training data\n", "- We use the predict function to find the training accuracy as well as the testing accuracy" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "1yAeKkLvekaU" }, "source": [ "## Train SVM on CIFAR" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "iDE7rE7aekaX" }, "outputs": [], "source": [ " ##################################\n", " #The output was cleared. Optimal output in the cell below\n", " ##################################\n", "\n", "reg_const = [0.0001,0.005,0.05,0.5,1.0,10]\n", "lr = [0.0005,0.005,0.05,0.5,1,1.5]\n", "n_epoc=[1,5,10,15,20]\n", "\n", "for ll in lr:\n", " for epo in n_epoc:\n", " for regg in reg_const:\n", " svm_CIFAR = SVM(n_class_CIFAR, ll, epo, regg)\n", " svm_CIFAR.train(X_train_CIFAR, y_train_CIFAR)\n", " pred_svm = svm_CIFAR.predict(X_train_CIFAR)\n", " print('The training accuracy for LR=%f, epochs=%f,and RG=%f given by: %f' % (ll,epo,regg,get_acc(pred_svm, y_train_CIFAR)))\n", "\n" ] }, { "cell_type": "code", "execution_count": 297, "metadata": { "colab": {}, "colab_type": "code", "id": "ffkatyFYekaa" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The training accuracy is given by: 36.727500\n" ] } ], "source": [ "#Using the optimal value from the above calculations to train the model\n", "lr = 0.005\n", "n_epochs =15\n", "reg_const = 0.000500 #0.55\n", "svm_CIFAR = SVM(n_class_CIFAR, lr, n_epochs, reg_const)\n", "svm_CIFAR.train(X_train_CIFAR, y_train_CIFAR)\n", "pred_svm = svm_CIFAR.predict(X_train_CIFAR)\n", "\n", "pred_svm = svm_CIFAR.predict(X_train_CIFAR)\n", "print('The training accuracy is given by: %f' % (get_acc(pred_svm, y_train_CIFAR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "iyJ9izIiekag" }, "source": [ "### Validate SVM on CIFAR" ] }, { "cell_type": "code", "execution_count": 298, "metadata": { "colab": {}, "colab_type": "code", "id": "3AOLnOrDekah" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The validation accuracy is given by: 32.920000\n" ] } ], "source": [ "pred_svm = svm_CIFAR.predict(X_val_CIFAR)\n", "print('The validation accuracy is given by: %f' % (get_acc(pred_svm, y_val_CIFAR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "B3r-s4QKekaj" }, "source": [ "### Test SVM on CIFAR" ] }, { "cell_type": "code", "execution_count": 299, "metadata": { "colab": {}, "colab_type": "code", "id": "UL5D-RGLekal" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The testing accuracy is given by: 32.830000\n" ] } ], "source": [ "pred_svm = svm_CIFAR.predict(X_test_CIFAR)\n", "print('The testing accuracy is given by: %f' % (get_acc(pred_svm, y_test_CIFAR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "PTp5tNTdekao" }, "source": [ "### SVM_CIFAR Kaggle Submission\n", "\n", "Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 1 CIFAR. Use the following code to do so:" ] }, { "cell_type": "code", "execution_count": 300, "metadata": { "colab": {}, "colab_type": "code", "id": "cW7haWVTekao" }, "outputs": [], "source": [ "output_submission_csv('kaggle/svm_submission_CIFAR.csv', svm_CIFAR.predict(X_test_CIFAR))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "yE2ji3UJekat" }, "source": [ "## Train SVM on Mushroom" ] }, { "cell_type": "code", "execution_count": 206, "metadata": { "colab": {}, "colab_type": "code", "id": "aCuDmVi4ekav" }, "outputs": [], "source": [ "#Since the SVM is based on y={0,1} formulation in the slides.\n", "y_train_MR = np.asarray([0 if each_element == -1 else 1 for each_element in y_train_MR])\n", "y_val_MR = np.asarray([0 if each_element == -1 else 1 for each_element in y_val_MR])\n", "y_test_MR = np.asarray([0 if each_element == -1 else 1 for each_element in y_test_MR])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "X8gSELqjekbS" }, "outputs": [], "source": [ " ##################################\n", " #The output was cleared. Optimal output in the cell below\n", " ##################################\n", "\n", "reg_const = [0.005,0.05,0.5,1.0,10]\n", "lr = [0.0005,0.005,0.05,1.5]\n", "n_epoc=[1,5,10,15]\n", "\n", "for ll in lr:\n", " for epo in n_epoc:\n", " for regg in reg_const:\n", " svm_MR = SVM(n_class_MR, ll, epo, regg)\n", " svm_MR.train(X_train_MR, y_train_MR)\n", " pred_svm = svm_MR.predict(X_train_MR)\n", " print('Training accuracy with LR= %f, Epochs=%f and RConst=%f is given by: %f' % (ll, epo,regg,get_acc(pred_svm, y_train_MR)))\n", " pred_svm = svm_MR.predict(X_val_MR)\n", " \n", " print('Validation accuracy with LR= %f, Epochs=%f and RConst=%f is given by: %f' % (ll, epo,regg,get_acc(pred_svm, y_val_MR)))\n", " pred_svm = svm_MR.predict(X_test_MR)\n", " print('Testing accuracy with LR= %f, Epochs=%f and RConst=%f is given by: %f' % (ll, epo,regg,get_acc(pred_svm, y_test_MR)))\n" ] }, { "cell_type": "code", "execution_count": 294, "metadata": { "colab": {}, "colab_type": "code", "id": "DFs3Na5Sekay" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The training accuracy is given by: 91.731637\n" ] } ], "source": [ "#Using the optimal value from the above calculations to train the model\n", "reg_const = 0.005\n", "lr =0.05\n", "n_epoc=15\n", "svm_MR = SVM(n_class_MR, lr, n_epoc, reg_const)\n", "svm_MR.train(X_train_MR, y_train_MR)\n", "pred_svm = svm_MR.predict(X_train_MR)\n", "print('The training accuracy is given by: %f' % (get_acc(pred_svm, y_train_MR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "dVHm88vfeka1" }, "source": [ "### Validate SVM on Mushroom" ] }, { "cell_type": "code", "execution_count": 295, "metadata": { "colab": {}, "colab_type": "code", "id": "AVne4NvNeka2" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The validation accuracy is given by: 91.569231\n" ] } ], "source": [ "pred_svm = svm_MR.predict(X_val_MR)\n", "print('The validation accuracy is given by: %f' % (get_acc(pred_svm, y_val_MR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "V2pYbMvLeka6" }, "source": [ "## Test SVM on Mushroom" ] }, { "cell_type": "code", "execution_count": 296, "metadata": { "colab": {}, "colab_type": "code", "id": "5Rctxdmfeka6" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The testing accuracy is given by: 91.200000\n" ] } ], "source": [ "pred_svm = svm_MR.predict(X_test_MR)\n", "print('The testing accuracy is given by: %f' % (get_acc(pred_svm, y_test_MR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "Xlh_koVWeka8" }, "source": [ "# Softmax Classifier (with SGD)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "collapsed": true, "id": "Ah2WnFfkeka9" }, "source": [ "Next, you will train a Softmax classifier. This classifier consists of a linear function of the input data followed by a softmax function which outputs a vector of dimension C (number of classes) for each data point. Each entry of the softmax output vector corresponds to a confidence in one of the C classes, and like a probability distribution, the entries of the output vector sum to 1. We use a cross-entropy loss on this sotmax output to train the model. \n", "\n", "Check the following link as an additional resource on softmax classification: http://cs231n.github.io/linear-classify/#softmax\n", "\n", "Once again we will train the classifier with SGD. This means you need to compute the gradients of the softmax cross-entropy loss function according to the weights and update the weights using this gradient. Check the following link to help with implementing the gradient updates: https://deepnotes.io/softmax-crossentropy" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "nXDrw_2Neka-" }, "source": [ "The softmax classifier has 3 hyperparameters that you can experiment with :\n", "- **Learning rate** - As above, this controls how much the model weights are updated with respect to their gradient.\n", "- **Number of Epochs** - As described for perceptron.\n", "- **Regularization constant** - Hyperparameter to determine the strength of regularization. In this case, we minimize the L2 norm of the model weights as regularization, so the regularization constant is a coefficient on the L2 norm in the combined cross-entropy and regularization objective." ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "CnMp9JEWeka_" }, "source": [ "You will implement a softmax classifier using SGD in the **models/Softmax.py**\n", "\n", "The following code: \n", "- Creates an instance of the Softmax classifier class \n", "- The train function of the Softmax class is trained on the training data\n", "- We use the predict function to find the training accuracy as well as the testing accuracy" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "umaKawiTekbA" }, "source": [ "## Train Softmax on CIFAR" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "6uvRZEIyekbB" }, "outputs": [], "source": [ " ##################################\n", " #The output was cleared. Optimal output in the cell below\n", " ##################################\n", "reg_const = [0.005,0.05,0.5,1.0,10]\n", "lr = [0.0005,0.005,0.05,1.5]\n", "n_epoc=[1,5,10,15]\n", "\n", "for ll in lr:\n", " for epo in n_epoc:\n", " for regg in reg_const:\n", "\n", " softmax_CIFAR = Softmax(n_class_CIFAR, ll, epo, regg)\n", " softmax_CIFAR.train(X_train_CIFAR, y_train_CIFAR)\n", " pred_softmax = softmax_CIFAR.predict(X_train_CIFAR)\n", " print('Training accuracy with LR= %f, Epochs=%f and RConst=%f is given by: %f' % (ll, epo, regg,get_acc(pred_softmax, y_train_CIFAR)))\n", " pred_softmax = softmax_CIFAR.predict(X_val_CIFAR)\n", "\n", " print('Validation accuracy with LR= %f, Epochs=%f and RConst=%f is given by: %f' % (ll, epo, regg,get_acc(pred_softmax, y_val_CIFAR)))\n", " pred_softmax = softmax_CIFAR.predict(X_test_CIFAR)\n", " print('Testing accuracy with LR= %f, Epochs=%f and RConst=%f is given by: %f' % (ll, epo, regg,get_acc(pred_softmax, y_test_CIFAR)))" ] }, { "cell_type": "code", "execution_count": 223, "metadata": { "colab": {}, "colab_type": "code", "id": "LnTWl3tWekbE" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The training accuracy is given by: 34.450000\n" ] } ], "source": [ "#Using the optimal value from the above calculations to train the model\n", "reg_const =0.0005\n", "lr =0.00500000\n", "n_epoc=15\n", "softmax_CIFAR = Softmax(n_class_CIFAR, lr, n_epoc, reg_const)\n", "softmax_CIFAR.train(X_train_CIFAR, y_train_CIFAR)\n", "pred_softmax = softmax_CIFAR.predict(X_train_CIFAR)\n", "print('The training accuracy is given by: %f' % (get_acc(pred_softmax, y_train_CIFAR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "lKPRrcv8ekbF" }, "source": [ "### Validate Softmax on CIFAR" ] }, { "cell_type": "code", "execution_count": 224, "metadata": { "colab": {}, "colab_type": "code", "id": "kd6EQV5NekbG" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The validation accuracy is given by: 30.580000\n" ] } ], "source": [ "pred_softmax = softmax_CIFAR.predict(X_val_CIFAR)\n", "print('The validation accuracy is given by: %f' % (get_acc(pred_softmax, y_val_CIFAR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "uvKar330ekbK" }, "source": [ "### Testing Softmax on CIFAR" ] }, { "cell_type": "code", "execution_count": 225, "metadata": { "colab": {}, "colab_type": "code", "id": "F6x09wISekbK" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The testing accuracy is given by: 30.700000\n" ] } ], "source": [ "pred_softmax = softmax_CIFAR.predict(X_test_CIFAR)\n", "print('The testing accuracy is given by: %f' % (get_acc(pred_softmax, y_test_CIFAR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "Uvlrj_LWekbO" }, "source": [ "### Softmax_CIFAR Kaggle Submission\n", "\n", "Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 1 CIFAR. Use the following code to do so:" ] }, { "cell_type": "code", "execution_count": 226, "metadata": { "colab": {}, "colab_type": "code", "id": "Ha_oLe1CekbO" }, "outputs": [], "source": [ "output_submission_csv('kaggle/softmax_submission_CIFAR.csv', softmax_CIFAR.predict(X_test_CIFAR))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "dES7qQC5ekbS" }, "source": [ "## Train Softmax on Mushroom" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ " ##################################\n", " #The output was cleared. Optimal output in the cell below\n", " ##################################\n", "reg_const = [0.005,0.05,0.5,1.0,10]\n", "lr = [0.0005,0.005,0.05,1.5]\n", "n_epoc=[1,5,10,15]\n", "# lr = 0.005\n", "# n_epochs =2000\n", "# reg_const = 0.001\n", "\n", "for ll in lr:\n", " for regg in reg_const:\n", " for epo in n_epoc:\n", " softmax_MR = Softmax(n_class_MR, ll, epo, regg)\n", " softmax_MR.train(X_train_MR, y_train_MR)\n", " pred_softmax = softmax_MR.predict(X_train_MR)\n", "\n", " print('Training accuracy is given by ll %f epo %f reg_const %f %f' % (ll,epo,regg,get_acc(pred_softmax, y_train_MR)))\n", " pred_softmax = softmax_MR.predict(X_val_MR)\n", " print('Validation accuracy is given by ll %f epo %f reg_const %f %f' % (ll,epo,regg,get_acc(pred_softmax, y_val_MR)))\n", " pred_softmax = softmax_MR.predict(X_test_MR)\n", " print('Testing accuracy is given by ll %f epo %f reg_const %f %f' % (ll,epo,regg,get_acc(pred_softmax, y_test_MR)))\n" ] }, { "cell_type": "code", "execution_count": 287, "metadata": { "colab": {}, "colab_type": "code", "id": "_2iBFk6XekbU" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The training accuracy is given by: 90.828888\n" ] } ], "source": [ "#Using the optimal value from the above calculations to train the model\n", "reg_const = 0.0005\n", "lr = 0.05\n", "n_epoc=15\n", "softmax_MR = Softmax(n_class_MR, lr, n_epoc, reg_const)\n", "softmax_MR.train(X_train_MR, y_train_MR)\n", "pred_softmax = softmax_MR.predict(X_train_MR)\n", "print('The training accuracy is given by: %f' % (get_acc(pred_softmax, y_train_MR)))\n" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "L1GjgVeyekbX" }, "source": [ "### Validate Softmax on Mushroom" ] }, { "cell_type": "code", "execution_count": 288, "metadata": { "colab": {}, "colab_type": "code", "id": "z9jmpoVXekbX" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The validation accuracy is given by: 89.230769\n" ] } ], "source": [ "pred_softmax = softmax_MR.predict(X_val_MR)\n", "print('The validation accuracy is given by: %f' % (get_acc(pred_softmax, y_val_MR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "qhUt9JtLekba" }, "source": [ "### Testing Softmax on Mushroom" ] }, { "cell_type": "code", "execution_count": 301, "metadata": { "colab": {}, "colab_type": "code", "id": "f_gIgzClekba" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The testing accuracy is given by: 88.246154\n" ] } ], "source": [ "pred_softmax = softmax_MR.predict(X_test_MR)\n", "print('The testing accuracy is given by: %f' % (get_acc(pred_softmax, y_test_MR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "0tdUus0Zekbc" }, "source": [ "# Logistic Classifier" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "We2MuYNtekbd" }, "source": [ "The Logistic Classifier has 2 hyperparameters that you can experiment with:\n", "- **Learning rate** - similar to as defined above in Perceptron, this parameter scales by how much the weights are changed according to the calculated gradient update. \n", "- **Number of Epochs** - As described for perceptron.\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "wXxaiwV5ekbe" }, "source": [ "You will implement the Logistic Classifier in the **models/Logistic.py**\n", "\n", "The following code: \n", "- Creates an instance of the Logistic classifier class \n", "- The train function of the Logistic class is trained on the training data\n", "- We use the predict function to find the training accuracy as well as the testing accuracy" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "pmuNo9Ieekbe" }, "source": [ "### Training Logistic Classifer" ] }, { "cell_type": "code", "execution_count": 113, "metadata": { "colab": {}, "colab_type": "code", "id": "KQkhDEenekbe" }, "outputs": [], "source": [ "#Since the logistic regression is based on y={-1,1} formulation in the slides.\n", "y_train_MR = np.asarray([-1 if each_element == 0 else 1 for each_element in y_train_MR])\n", "y_val_MR = np.asarray([-1 if each_element == 0 else 1 for each_element in y_val_MR])\n", "y_test_MR = np.asarray([-1 if each_element == 0 else 1 for each_element in y_test_MR])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ " ##################################\n", " #The output was cleared. Optimal output in the cell below\n", " ##################################\n", "\n", "lr = [0.0005,0.005,0.05,0.5,1,1.5]\n", "n_epoc=[10,100,500,1000]\n", "\n", "for ll in lr:\n", " for epo in n_epoc:\n", " lr = Logistic(ll, epo)\n", " lr.train(X_train_MR, y_train_MR)\n", " pred_lr = lr.predict(X_train_MR)\n", " print('Training accuracy with LR= %f and epo= %f is given by: %f' % (ll,epo,get_acc(pred_lr, y_train_MR)))\n", " pred_lr = lr.predict(X_val_MR)\n", " print('Validation accuracy with LR= %f and epo= %f is given by: %f' % (ll,epo,get_acc(pred_lr, y_val_MR)))\n", " pred_lr = lr.predict(X_test_MR)\n", " print('Testing accuracy with LR= %f and epo= %f is given by: %f' % (ll,epo,get_acc(pred_lr, y_test_MR)))" ] }, { "cell_type": "code", "execution_count": 135, "metadata": { "colab": {}, "colab_type": "code", "id": "Hv3RfFL0ekbi" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The training accuracy is given by: 87.853919\n" ] } ], "source": [ "#Using the optimal value from the above calculations to train the model\n", "lr = 0.05\n", "n_epoc= 10\n", "lr = Logistic(lr, n_epoc)\n", "lr.train(X_train_MR, y_train_MR)\n", "pred_lr = lr.predict(X_train_MR)\n", "print('The training accuracy is given by: %f' % (get_acc(pred_lr, y_train_MR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "18PPUvPgekbk" }, "source": [ "### Validate Logistic Classifer" ] }, { "cell_type": "code", "execution_count": 136, "metadata": { "colab": {}, "colab_type": "code", "id": "3pt1E_hxekbm" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The validation accuracy is given by: 93.292308\n" ] } ], "source": [ "pred_lr = lr.predict(X_val_MR)\n", "print('The validation accuracy is given by: %f' % (get_acc(pred_lr, y_val_MR)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "A4ICfqI6ekbn" }, "source": [ "### Test Logistic Classifier" ] }, { "cell_type": "code", "execution_count": 137, "metadata": { "colab": {}, "colab_type": "code", "id": "QG_oWzx5ekbo" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The testing accuracy is given by: 93.292308\n" ] } ], "source": [ "pred_lr = lr.predict(X_test_MR)\n", "print('The testing accuracy is given by: %f' % (get_acc(pred_lr, y_test_MR)))" ] } ], "metadata": { "colab": { "name": "CS 498DL Assignment-1.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 1 }