Linear-Classifiers / models / Perceptron.py
Perceptron.py
Raw
"""Perceptron model."""

import numpy as np
import random

class Perceptron:
    def __init__(self, n_class: int, lr: float, epochs: int):
        """Initialize a new classifier.

        Parameters:
            n_class: the number of classes
            lr: the learning rate
            epochs: the number of epochs to train for
        """
        self.w = None  # TODO: change this
        self.lr = lr
        self.epochs = epochs
        self.n_class = n_class

    def train(self, X_train: np.ndarray, y_train: np.ndarray):
        """Train the classifier.

        Use the perceptron update rule as introduced in Lecture 3.

        Parameters:
            X_train: a number array of shape (N, D) containing training data;
                N examples with D dimensions
            y_train: a numpy array of shape (N,) containing training labels
        """
        #self.w=np.zeros((10,len(X_train[0]))) # each image is a single vector
        self.w=np.random.random_sample((self.n_class,len(X_train[0])))
    
        if self.n_class ==2:
            #print(self.n_class)
            #print("shape of w",self.w.shape,self.w[1].shape )
            #print("here")
            for iti in range(self.epochs):
              epo_counter=1 # counts the number of epochs
              for i in range(X_train.shape[0]):
                y_predict=np.argmax(np.dot(self.w, X_train[i].T))
                if (y_predict == 0):
                    act=-1
                else:
                    act=1
                #print(y_predict,act)
                if act != y_train[i]:
                  #print("here lolol")
                  #for x in X_train[i]:
                  #self.w[y_train[i]]=self.w[y_train[i]]+((self.lr*(y_predict-y_train[i]))*np.asarray(X_train[i]))#*(self.w.shape[1]))
                  if (self.lr>1 and epo_counter>0.75*iti):
                      self.lr-=0.001
                      
                  self.w[y_train[i]]=self.w[y_train[i]]+(self.lr*(y_train[i])*np.asarray(X_train[i]))
                  
                  #print(len(self.lr*(y_predict-y_train[i])*X_train[i]))
        else:
            #print(self.n_class)
            #print("shape of w",self.w.shape,self.w[1].shape )
            for iti in range(self.epochs):
              epo_counter=1
              for i in range(X_train.shape[0]):
                y_predict=np.argmax(np.dot(self.w, X_train[i].T))
                #print(y_predict)
                #print(y_predict,act)
                if y_predict != y_train[i]:
                  
                  mask=np.ones(self.w.shape[0],dtype=bool)
                  if (self.lr>1 and epo_counter>0.75*iti):
                      self.lr-=0.001
                  #print("mask",mask.shape)
                  mask[y_train[i]]=False
                  self.w[y_train[i]]=self.w[y_train[i]]+(self.lr*(y_train[i])*np.asarray(X_train[i]))
                  #print(self.w[mask.T,:].shape)
                  self.w[mask,:]=self.w[mask,:]-np.array([(self.lr*(y_train[i])*np.asarray(X_train[i])),]*(self.w.shape[0]-1)) # the multiplication with shape cause it to repeat n times 
              epo_counter+=1
                   
                  
                  
            



            

        

    def predict(self, X_test: np.ndarray) -> np.ndarray:
        
        """Use the trained weights to predict labels for test data points.

        Parameters:
            X_test: a numpy array of shape (N, D) containing testing data;
                N examples with D dimensions

        Returns:
            predicted labels for the data in X_test; a 1-dimensional array of
                length N, where each element is an integer giving the predicted
                class.
        """
        # TODO: implement me
        predict=[]
        if self.n_class ==2:
           
           for x in X_test:
               
               val=np.argmax(np.dot(self.w,x.T))
               # print(len(x),self.w.shape,val)
               if (val == 0):
                   act =-1
               else:
                   act=1
               predict.append(act)
           #print(predict)
        else:
            for x in X_test:
               #print(len(x),self.w.shape)
               val=np.argmax(np.dot(self.w,x.T))
               predict.append(val)
        return predict

#if __name__ == '__main__':
  # checking=Perceptron(10,0.5,2)
  # x_train=np.random.rand(400, 30)
  # print(len(x_train[1]))
  # y_train=np.random.randint(10, size=(400))
  # y_predict=np.random.rand(400)
  
  # checking.train(x_train, y_train)
  # print(len([(0.5*(y_train[10]-y_predict[10])*10.0)]*(checking.w.shape[1])))
  # print(checking.w.shape)