models/Softmax.py · Linear-Classifiers

"""Softmax model."""

import numpy as np


class Softmax:
    def __init__(self, n_class: int, lr: float, epochs: int, reg_const: float):
        """Initialize a new classifier.

        Parameters:
            n_class: the number of classes
            lr: the learning rate
            epochs: the number of epochs to train for
            reg_const: the regularization constant
        """
        self.w = None  # TODO: change this
        self.lr = lr
        self.epochs = epochs
        self.reg_const = reg_const
        self.n_class = n_class

    def calc_gradient(self, X_train: np.ndarray, y_train: np.ndarray) -> np.ndarray:
        """Calculate gradient of the softmax loss.

        Inputs have dimension D, there are C classes, and we operate on
        mini-batches of N examples.

        Parameters:
            X_train: a numpy array of shape (N, D) containing a mini-batch
                of data
            y_train: a numpy array of shape (N,) containing training labels;
                y[i] = c means that X[i] has label c, where 0 <= c < C

        Returns:
            gradient with respect to weights w; an array of same shape as w
        """
        
        grad=np.zeros((self.w.shape))
        for i in range(X_train.shape[0]):
            scores=X_train[i].dot(self.w)
            
            sum_exp_scores=np.sum(np.exp(scores-np.max(scores)))
            for markers in range(self.w.shape[1]):
                if markers == y_train[i]:
                    grad[:,markers] +=((np.exp(scores[markers]-np.max(scores)) /sum_exp_scores )-1)*X_train[i]
                else:
                    grad[:,markers] +=(np.exp(scores[markers]-np.max(scores)) /sum_exp_scores)*X_train[i]
        
        grad=grad/X_train.shape[0]
        grad=grad+self.reg_const*self.w
        
        return grad       
        
        
        
        
        
        
    

    def train(self, X_train: np.ndarray, y_train: np.ndarray):
        """Train the classifier.

        Hint: operate on mini-batches of data for SGD.

        Parameters:
            X_train: a numpy array of shape (N, D) containing training data;
                N examples with D dimensions
            y_train: a numpy array of shape (N,) containing training labels
        """

        self.w = np.random.randn(X_train.shape[1],self.n_class)*0.01
        #self.w=np.zeros((X_train.shape[1],self.n_class))
        #self.w=np.random.random_sample((X_train.shape[1],self.n_class)) #notice this is different from perceptron################
        batch_size=200
        for i in range(self.epochs):
            for iinepoch in range(int(X_train.shape[0]/batch_size)):
                indices = np.random.choice(X_train.shape[0], batch_size)
                X_batch = X_train[indices]
                y_batch = y_train[indices]
                gradient = self.calc_gradient(X_batch, y_batch)
                
                self.w -=gradient*self.lr

    def predict(self, X_test: np.ndarray) -> np.ndarray:
        """Use the trained weights to predict labels for test data points.

        Parameters:
            X_test: a numpy array of shape (N, D) containing testing data;
                N examples with D dimensions

        Returns:
            predicted labels for the data in X_test; a 1-dimensional array of
                length N, where each element is an integer giving the predicted
                class.
        """
        y_pred = np.zeros(X_test.shape[0])
        y_pred = np.argmax(np.dot(X_test, self.w), axis=1)
 
        return y_pred