# %% [markdown]
# # Face Recognition Using Eigenfaces Implementation 

# %% [markdown]
# ### PART 1: Computation of The Eigenfaces

# %%
import os
import numpy as np
import matplotlib.image as img
from matplotlib import pyplot as plt

# %% [markdown]
# step 1 & 2: obtain face images & represent those as col vectors

# %%
# list of all training files in a directory
path = 'train_file_path'
train_image_list = os.listdir(path)

# %%
#   obtain face images (training faces) -> make a matrix of all image vectors
# & represent every image as a vector 

# a set of image vectors for training set
for i in range(len(train_image_list)):
    file_name = train_image_list[i]
    path = 'train_file_path' + file_name
    img_vector = img.imread(path).flatten().reshape(-1, 1)

    if i == 0:
        train_I = np.array(img_vector)
    else:
        train_I = np.append(train_I, img_vector, axis=1)

# %%
train_I.shape

# %% [markdown]
# step 3: compute the avg face

# %%
# Compute the avg face (the avg face)
mean = np.mean(train_I, axis=1)

# %% [markdown]
# step 4: subtract the mean face

# %%
normalized_train_I = train_I - mean.reshape(-1, 1)

# %% [markdown]
# step 6-1: consider A^T * A

# %%
train_matrix = np.dot(normalized_train_I.T, normalized_train_I)

# %% [markdown]
# step 6-2: compute the eigenvectors of A^T * A (eigen decomposition)

# %%
eig_vals, eig_vecs = np.linalg.eig(train_matrix)

# %% [markdown]
# sort the eigenvalues and eigenvector in descending order

# %%
idx_ascending = np.argsort(eig_vals)
idx_descending = np.flip(idx_ascending)

# %%
eig_vals = eig_vals[idx_descending]
eig_vecs = eig_vecs[:, idx_descending]

# %% [markdown]
# step 6-3: compute the M best eigenvectors of A * A^T

# %%
best_eig_vecs = np.dot(normalized_train_I, eig_vecs)

# %%
# normalize the M eigenvectors
normalized_best_eig_vecs = best_eig_vecs / np.linalg.norm(best_eig_vecs, axis=0)

# %% [markdown]
# step 7: select k eigenvectors

# %%
# find the smallest k
variance_arr = np.arange(0, 100, 5)
k_arr = []

for var in variance_arr:
    total_sum_eig_val = np.sum(eig_vals)
    accumulated_sum_eig_val = 0
    variance = var / 100

    for i in range(eig_vals.shape[0]):
        accumulated_sum_eig_val += eig_vals[i]
        if (accumulated_sum_eig_val > (total_sum_eig_val * variance)):
            k = i
            k_arr.append(k)
            break

# %%
fig_k, ax_k = plt.subplots()
ax_k.plot(k_arr, variance_arr, linewidth=2.0)
plt.ylabel("Variance")
plt.xlabel("Number of Components (k)")
plt.xlim(-10, 200)
plt.show()

# %%
k = 60 # temp

# %%
eigenfaces = normalized_best_eig_vecs[:, :k]

# %% [markdown]
# ##### Display some of k eigenvectors

# %%
# display s = 10 eigenfaces
s = 10

for i in range(s):
    eigenface = np.reshape(eigenfaces[:, i], (150, 130))

    plt.imshow(eigenface, interpolation='nearest') #display the image 
    plt.gray()  #grayscale conversion
    plt.show()

# %% [markdown]
# ### PART 2: Face Recognition Using Eigenfaces

# %% [markdown]
# #### step 1: normalize a face image 

# %% [markdown]
# create a set of image vectors from test set & pick up an image later

# %%
# list of all test files in a directory
path = 'test_file_path'
test_image_list = os.listdir(path)

# %%
# a set of image vectors from test set
for i in range(len(test_image_list)):
    file_name = test_image_list[i]
    path = 'test_file_path' + file_name
    img_vector = img.imread(path).flatten().reshape(-1, 1)

    if i == 0:
        test_I = np.array(img_vector)
    else:
        test_I = np.append(test_I, img_vector, axis=1)

# %%
# Normalize the test set
normalized_test_I = test_I - mean.reshape(-1, 1)

# %% [markdown]
# #### step 2: project on the eigenspace

# %% [markdown]
# Compute weight vectors for the test set

# %%
# compute weight vectors for the test set
test_W = np.dot(eigenfaces.T, normalized_test_I)
test_W.shape

# %% [markdown]
# Compute a set of weight vectors for the training set

# %%
# a set of weight vectors for the training set
train_W = np.dot(eigenfaces.T, normalized_train_I)

# %% [markdown]
# Pick an image from the test set (an unknown image) & get a weight of the image

# %%
j = 10
unknown_img = test_I[:, j]
unknown_img_W = test_W[:, j].reshape(-1, 1) # return as a col vector

# %% [markdown]
# Find the min error using the Euclidean Distance

# %%
# compute the distances between a test img & train imgs 
diff = unknown_img_W - train_W
square_diff = np.square(diff)
dist = np.sum(square_diff, axis=0)
dist = dist ** (1/2)

# %%
# Find the index of min value = l
l = np.argmin(dist)

# %% [markdown]
# #### project the test image to the eigenvector space

# %%
# a test image created by linear combination
test_hat = np.dot(eigenfaces, unknown_img_W)

reconstructed_test_hat = np.reshape(test_hat, (150, 130))

plt.imshow(reconstructed_test_hat, interpolation='nearest') #display the image 
plt.gray()  #grayscale conversion
plt.show()

# %% [markdown]
# ##### The picked test image & print it

# %%
print("The file name of the test image: ", test_image_list[j])
original_test_img = np.reshape(unknown_img, (150, 130))

plt.imshow(original_test_img, interpolation='nearest') #display the image 
plt.gray()  #grayscale conversion
plt.show()

# %% [markdown]
# #### Finding the best match(es)

# %% [markdown]
# Finding the best match

# %%
# The lth image vector in the training set
train_img = train_I[:, l]

# turn the image vector into original image
original_img = np.reshape(train_img, (150, 130))

plt.imshow(original_img, interpolation='nearest') #display the image 
plt.gray()  #grayscale conversion
plt.show()

# %% [markdown]
# Finding the best 5 matches

# %%
five_smallest_idx = np.argpartition(dist, 4)[:5]
five_smallest_sorted_idx = five_smallest_idx[np.argsort(dist[five_smallest_idx])]

# %%
fig_five = plt.figure(figsize=(10, 10)) 

for i in range(5):
    print(five_smallest_sorted_idx[i], ",", train_image_list[five_smallest_sorted_idx[i]], dist[five_smallest_sorted_idx[i]])
    # The lth image vector in the training set
    train_img = train_I[:, five_smallest_sorted_idx[i]]

    # turn the image vector into original image
    original_img = np.reshape(train_img, (150, 130))

    fig_five.add_subplot(1, 5, i+1)

    plt.imshow(original_img, interpolation='nearest') #display the image

# %% [markdown]
# #### Show an example of faces represented as a linear combination of k eigenvectors

# %%
idx_i = 400

# %% [markdown]
# a new image created by linear combination

# %%
new_face = np.dot(eigenfaces, train_W[:, idx_i]) # new_face created by a linear combination 
reshaped_new_face = np.reshape(new_face, (150, 130))

plt.imshow(reshaped_new_face, interpolation='nearest') #display the image 
plt.gray()  #grayscale conversion
plt.show()

# %% [markdown]
# an original image

# %%
# The lth image vector in the training set
train_img = train_I[:, idx_i]

# turn the image vector into original image
original_img = np.reshape(train_img, (150, 130))

plt.imshow(original_img, interpolation='nearest') #display the image 
plt.gray()  #grayscale conversion
plt.show()

# %% [markdown]
# ## Show multiple test images with their best matches

# %%
# create figure 
fig = plt.figure(figsize=(10, 5)) 
    
# setting values to rows and column variables 
rows = 2 # can be replaced
columns = 6 # fixed

for i in range(rows):
    # choose a random index & compute the distance
    rand_int = np.random.randint(0, len(test_image_list))
    difference = test_W[:, rand_int].reshape(-1, 1) - train_W
    square_difference = np.square(difference)
    euclidean_dist = np.sum(square_difference, axis=0) ** (1/2)

    # Find the best five indices
    five_best_idx = np.argpartition(euclidean_dist, 4)[:5]
    five_best_sorted_idx = five_best_idx[np.argsort(euclidean_dist[five_best_idx])]
    five_best_sorted_idx

    if i == 0:
        five_best = np.array(five_best_sorted_idx)
    else:
        five_best = np.vstack((five_best, five_best_sorted_idx))

    # the original test image
    test_img = test_I[:, rand_int]
    original_test_img = np.reshape(test_img, (150, 130))
    fig.add_subplot(rows, columns, (columns * i)+1)

    plt.imshow(original_test_img, interpolation='nearest')
    plt.title("{}-th Test".format(rand_int))

# the five best images
for i in range(rows):
    acc = (columns * i)+1

    five_smallest_sorted_idx
    for j in range(1, columns):
        train_img = train_I[:, five_best[i, j-1]]
        original_img = np.reshape(train_img, (150, 130))
        fig.add_subplot(rows, columns, acc+j)
        plt.imshow(original_img, interpolation='nearest')
        plt.title("The {}-th Best".format(j))

# %% [markdown]
# ## Accuracy

# %%
def compute_weights(k):
    eigenfaces = normalized_best_eig_vecs[:, :k]

    # compute weight vectors for the test set
    test_W = np.dot(eigenfaces.T, normalized_test_I)

    # a set of weight vectors for the training set
    train_W = np.dot(eigenfaces.T, normalized_train_I)

    return test_W, train_W
    

def count_matched_image(k):
    count = 0
    test_W, train_W = compute_weights(k)

    for i in range(test_I.shape[1]):
        unknown_img_W = test_W[:, i].reshape(-1, 1) # return as a col vector
        unknown_img_name = test_image_list[i]

        # compute distances between a test img & train imgs 
        diff = unknown_img_W - train_W
        square_diff = np.square(diff)
        dist = np.sum(square_diff, axis=0)
        dist = dist ** (1/2)

        # Find the index of min value = l
        l = np.argmin(dist)

        # The lth image name in the training set
        train_img_name = train_image_list[l]

        if (unknown_img_name[:5] == train_img_name[:5]):
            count += 1

    return count

# %%
count_arr = []
for k_i in k_arr:
    count = count_matched_image(k_i)
    count_arr.append(count)

# %%
accuracy = np.array(count_arr) / test_I.shape[1]
accuracy = np.round(accuracy, 2)

# %%
# plot
fig_acc, ax = plt.subplots()
ax.plot(k_arr, accuracy, linewidth=2.0)
plt.show()

# %%
accuracy_for_k = count / test_I.shape[1]
print("When k =", k, ", the accuracy is", accuracy_for_k)