Subspace-Feature-Representations / Discriminant Analysis (DA) / Two_class_DA.py
Two_class_DA.py
Raw
import pandas as pd
import numpy as np
from numpy.linalg import matrix_rank
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


def Discriminat_Vector(X,y,n):

    X1 = np.array([X[i] for i in range(len(X)) if y[i] == 0])
    X2 = np.array([X[i] for i in range(len(X)) if y[i] == 1])  
    N1=len(X1) 
    N2=len(X2)
 
    m1h = np.mean(X1, axis=0)
    m2h = np.mean(X2, axis=0)
    delta = m1h-m2h    # difference in the estimated means

    # Between class scatter
    W1 = np.cov(X1.T)
    W2 = np.cov(X2.T)
    c = (N2-1)/(N1+N2-2)

    A = c*W1 + (1-c)*W2
    I = np.identity(X1.shape[1])
    W = np.add(A, np.dot(0.005,I)) 
    Wi = np.linalg.inv(W)
    Wi2 = Wi @ Wi
    Wi3 = Wi2 @ Wi

    d1 = Wi @ (m1h-m2h)
    d1_hat = d1 / np.linalg.norm(d1)
    alpha1 = np.sqrt(1/np.dot(np.dot(delta.T, Wi2),delta)) 
    b1 = np.dot(np.dot(delta.T, Wi2),delta) / np.dot(np.dot(delta.T, Wi3),delta)
    d2 = (Wi - b1 * Wi2) @ (m1h-m2h)
    d2_hat = d2 / np.linalg.norm(d2)
   
    d_lists=[]
    d_lists.append(d1_hat)
    d_lists.append(d2_hat)

    if n>1:

        for N in range(3,n+1):
            S_n_1=np.zeros([N-1,N-1])
            for i in range(N-1):
                for j in range(N-1):
                    S_n_1[i][j] = d_lists[j].T @ (Wi @ d_lists[i])     
            S_n_1_in = np.linalg.inv(S_n_1) 
            alpha_list=np.zeros(N-1)
            alpha_list[0]=1/alpha1
            
            D = np.array(d_lists)    
            Vec = np.array([1/alpha1,0])
            dn = np.dot(Wi, delta - np.dot(np.dot(D.T,S_n_1_in),alpha_list))
            dn_hat = dn / np.linalg.norm(dn)           
            d_lists.append(dn_hat)
            
    return d_lists, W
  
  
  
kk = [1,5,10,15]
results_orginal = []
results_DA = []
results_SVD = []
for i in range(10):
    train_data=np.load("DATA_paper/File_Pre_Train_Fea_{}.npy".format(i),allow_pickle=True) #Feature Repersenation from ResNet18
    test_data=np.load("DATA_paper/File_Pre_Test_Fea_{}.npy".format(i),allow_pickle=True)
    train_labels=np.load("DATA_paper/File_Train_label_{}.npy".format(i),allow_pickle=True)
    test_labels=np.load("DATA_paper/File_Test_label_{}.npy".format(i),allow_pickle=True)
    print(i)
    print("train_data",train_data.shape)
    print("test_data",test_data.shape)
    print("train_labels",train_labels.shape)
    print("test_labels",test_labels.shape)
    print("---")
       
    for q in kk:
        model_knn = knn(n_neighbors=q, weights='uniform', algorithm='auto')
        model_knn.fit(train_data,train_labels)
        result_or=model_knn.score(test_data,test_labels)
        results_orginal.append(result_or)
        
    d_lists, W = Discriminat_Vector(train_data,train_labels,10)
    d_lists_ = np.array(d_lists)
    project_da_train = np.dot(train_data, d_lists_.T)
    project_da_test = np.dot(test_data, d_lists_.T)
    
    for n in kk:
        for j in range(1,11):
            model_knn_dv = knn(n_neighbors=n, weights='uniform', algorithm='auto')
            model_knn_dv.fit(project_da_train[:,:j],train_labels)
            result_da=model_knn_dv.score(project_da_test[:,:j],test_labels)
            results_DA.append(result_da)
            
    U2, s2, V2 = np.linalg.svd(train_data.T, full_matrices=False)
    project_svd_train = np.dot(U2.T[:10],train_data.T).T
    project_svd_test = np.dot(U2.T[:10],test_data.T).T
    
    for m in kk:
        for d in range(1,11):
            model_knn_svd = knn(n_neighbors=m, weights='uniform', algorithm='auto')
            model_knn_svd.fit(project_svd_train[:,:d],train_labels)
            result_svd=model_knn_svd.score(project_svd_test[:,:d],test_labels)
            results_SVD.append(result_svd)