Linear-Classifiers / models / SVM.py
SVM.py
Raw
"""Support Vector Machine (SVM) model."""

import numpy as np


class SVM:
    def __init__(self, n_class: int, lr: float, epochs: int, reg_const: float):
        """Initialize a new classifier.

        Parameters:
            n_class: the number of classes
            lr: the learning rate
            epochs: the number of epochs to train for
            reg_const: the regularization constant
        """
        self.w = None  # TODO: change this
        self.alpha = lr
        self.epochs = epochs
        self.reg_const = reg_const
        self.n_class = n_class

    def calc_gradient(self, X_train: np.ndarray, y_train: np.ndarray) -> np.ndarray:
        """Calculate gradient of the svm hinge loss.

        Inputs have dimension D, there are C classes, and we operate on
        mini-batches of N examples.

        Parameters:
            X_train: a numpy array of shape (N, D) containing a mini-batch
                of data
            y_train: a numpy array of shape (N,) containing training labels;
                y[i] = c means that X[i] has label c, where 0 <= c < C

        Returns:
            the gradient with respect to weights w; an array of the same shape
                as w
        """
        # TODO: implement me
        
        
        grad=np.zeros((self.w.shape))
        for i in range(X_train.shape[0]):
            score=X_train[i].dot(self.w)
            y_predict_score= score[y_train[i]]
            for markers in range(self.w.shape[1]):
                if markers == y_train[i]:
                    continue
                svm_margin=score[markers] - y_predict_score+1
                if svm_margin > 0:
                    grad[:,markers] += X_train[i]
                    grad[:,y_train[i]]+= -X_train[i]
        
        
        grad=grad/X_train.shape[0]
        
        grad=grad+self.reg_const*self.w
        
        return grad

    def train(self, X_train: np.ndarray, y_train: np.ndarray):
        """Train the classifier.

        Hint: operate on mini-batches of data for SGD.

        Parameters:
            X_train: a numpy array of shape (N, D) containing training data;
                N examples with D dimensions
            y_train: a numpy array of shape (N,) containing training labels
        """
        # TODO: implement me
        bias=np.ones((X_train.shape[1]))
        
        X_train[0,:]=bias
        self.w = 0.001*np.random.randn(X_train.shape[1],self.n_class)
        #self.w=np.zeros((X_train.shape[1],self.n_class))
        #self.w=np.random.random_sample((X_train.shape[1],self.n_class)) #notice this is different from perceptron################
        batch_size=200
        #print(X_train.shape[0]/batch_size)
        for i in range(self.epochs):
            for iinepoch in range(int(X_train.shape[0]/batch_size)):
            #for inepo in range(X_train.shape[0]/batch_size)
                indices = np.random.choice(X_train.shape[0], batch_size)
                X_batch = X_train[indices]
                y_batch = y_train[indices]
                gradient = self.calc_gradient(X_batch, y_batch)
            
                self.w -=self.alpha * gradient

        

    def predict(self, X_test: np.ndarray) -> np.ndarray:
        """Use the trained weights to predict labels for test data points.

        Parameters:
            X_test: a numpy array of shape (N, D) containing testing data;
                N examples with D dimensions

        Returns:
            predicted labels for the data in X_test; a 1-dimensional array of
                length N, where each element is an integer giving the predicted
                class.
        """
        # TODO: implement me
        y_pred = np.zeros(X_test.shape[0])
        y_pred = np.argmax(np.dot(X_test, self.w), axis=1)
        return y_pred