"""Softmax model.""" import numpy as np class Softmax: def __init__(self, n_class: int, lr: float, epochs: int, reg_const: float): """Initialize a new classifier. Parameters: n_class: the number of classes lr: the learning rate epochs: the number of epochs to train for reg_const: the regularization constant """ self.w = None # TODO: change this self.lr = lr self.epochs = epochs self.reg_const = reg_const self.n_class = n_class def calc_gradient(self, X_train: np.ndarray, y_train: np.ndarray) -> np.ndarray: """Calculate gradient of the softmax loss. Inputs have dimension D, there are C classes, and we operate on mini-batches of N examples. Parameters: X_train: a numpy array of shape (N, D) containing a mini-batch of data y_train: a numpy array of shape (N,) containing training labels; y[i] = c means that X[i] has label c, where 0 <= c < C Returns: gradient with respect to weights w; an array of same shape as w """ grad=np.zeros((self.w.shape)) for i in range(X_train.shape[0]): scores=X_train[i].dot(self.w) sum_exp_scores=np.sum(np.exp(scores-np.max(scores))) for markers in range(self.w.shape[1]): if markers == y_train[i]: grad[:,markers] +=((np.exp(scores[markers]-np.max(scores)) /sum_exp_scores )-1)*X_train[i] else: grad[:,markers] +=(np.exp(scores[markers]-np.max(scores)) /sum_exp_scores)*X_train[i] grad=grad/X_train.shape[0] grad=grad+self.reg_const*self.w return grad def train(self, X_train: np.ndarray, y_train: np.ndarray): """Train the classifier. Hint: operate on mini-batches of data for SGD. Parameters: X_train: a numpy array of shape (N, D) containing training data; N examples with D dimensions y_train: a numpy array of shape (N,) containing training labels """ self.w = np.random.randn(X_train.shape[1],self.n_class)*0.01 #self.w=np.zeros((X_train.shape[1],self.n_class)) #self.w=np.random.random_sample((X_train.shape[1],self.n_class)) #notice this is different from perceptron################ batch_size=200 for i in range(self.epochs): for iinepoch in range(int(X_train.shape[0]/batch_size)): indices = np.random.choice(X_train.shape[0], batch_size) X_batch = X_train[indices] y_batch = y_train[indices] gradient = self.calc_gradient(X_batch, y_batch) self.w -=gradient*self.lr def predict(self, X_test: np.ndarray) -> np.ndarray: """Use the trained weights to predict labels for test data points. Parameters: X_test: a numpy array of shape (N, D) containing testing data; N examples with D dimensions Returns: predicted labels for the data in X_test; a 1-dimensional array of length N, where each element is an integer giving the predicted class. """ y_pred = np.zeros(X_test.shape[0]) y_pred = np.argmax(np.dot(X_test, self.w), axis=1) return y_pred