# %% [markdown] # # Face Recognition Using Eigenfaces Implementation # %% [markdown] # ### PART 1: Computation of The Eigenfaces # %% import os import numpy as np import matplotlib.image as img from matplotlib import pyplot as plt # %% [markdown] # step 1 & 2: obtain face images & represent those as col vectors # %% # list of all training files in a directory path = 'train_file_path' train_image_list = os.listdir(path) # %% # obtain face images (training faces) -> make a matrix of all image vectors # & represent every image as a vector # a set of image vectors for training set for i in range(len(train_image_list)): file_name = train_image_list[i] path = 'train_file_path' + file_name img_vector = img.imread(path).flatten().reshape(-1, 1) if i == 0: train_I = np.array(img_vector) else: train_I = np.append(train_I, img_vector, axis=1) # %% train_I.shape # %% [markdown] # step 3: compute the avg face # %% # Compute the avg face (the avg face) mean = np.mean(train_I, axis=1) # %% [markdown] # step 4: subtract the mean face # %% normalized_train_I = train_I - mean.reshape(-1, 1) # %% [markdown] # step 6-1: consider A^T * A # %% train_matrix = np.dot(normalized_train_I.T, normalized_train_I) # %% [markdown] # step 6-2: compute the eigenvectors of A^T * A (eigen decomposition) # %% eig_vals, eig_vecs = np.linalg.eig(train_matrix) # %% [markdown] # sort the eigenvalues and eigenvector in descending order # %% idx_ascending = np.argsort(eig_vals) idx_descending = np.flip(idx_ascending) # %% eig_vals = eig_vals[idx_descending] eig_vecs = eig_vecs[:, idx_descending] # %% [markdown] # step 6-3: compute the M best eigenvectors of A * A^T # %% best_eig_vecs = np.dot(normalized_train_I, eig_vecs) # %% # normalize the M eigenvectors normalized_best_eig_vecs = best_eig_vecs / np.linalg.norm(best_eig_vecs, axis=0) # %% [markdown] # step 7: select k eigenvectors # %% # find the smallest k variance_arr = np.arange(0, 100, 5) k_arr = [] for var in variance_arr: total_sum_eig_val = np.sum(eig_vals) accumulated_sum_eig_val = 0 variance = var / 100 for i in range(eig_vals.shape[0]): accumulated_sum_eig_val += eig_vals[i] if (accumulated_sum_eig_val > (total_sum_eig_val * variance)): k = i k_arr.append(k) break # %% fig_k, ax_k = plt.subplots() ax_k.plot(k_arr, variance_arr, linewidth=2.0) plt.ylabel("Variance") plt.xlabel("Number of Components (k)") plt.xlim(-10, 200) plt.show() # %% k = 60 # temp # %% eigenfaces = normalized_best_eig_vecs[:, :k] # %% [markdown] # ##### Display some of k eigenvectors # %% # display s = 10 eigenfaces s = 10 for i in range(s): eigenface = np.reshape(eigenfaces[:, i], (150, 130)) plt.imshow(eigenface, interpolation='nearest') #display the image plt.gray() #grayscale conversion plt.show() # %% [markdown] # ### PART 2: Face Recognition Using Eigenfaces # %% [markdown] # #### step 1: normalize a face image # %% [markdown] # create a set of image vectors from test set & pick up an image later # %% # list of all test files in a directory path = 'test_file_path' test_image_list = os.listdir(path) # %% # a set of image vectors from test set for i in range(len(test_image_list)): file_name = test_image_list[i] path = 'test_file_path' + file_name img_vector = img.imread(path).flatten().reshape(-1, 1) if i == 0: test_I = np.array(img_vector) else: test_I = np.append(test_I, img_vector, axis=1) # %% # Normalize the test set normalized_test_I = test_I - mean.reshape(-1, 1) # %% [markdown] # #### step 2: project on the eigenspace # %% [markdown] # Compute weight vectors for the test set # %% # compute weight vectors for the test set test_W = np.dot(eigenfaces.T, normalized_test_I) test_W.shape # %% [markdown] # Compute a set of weight vectors for the training set # %% # a set of weight vectors for the training set train_W = np.dot(eigenfaces.T, normalized_train_I) # %% [markdown] # Pick an image from the test set (an unknown image) & get a weight of the image # %% j = 10 unknown_img = test_I[:, j] unknown_img_W = test_W[:, j].reshape(-1, 1) # return as a col vector # %% [markdown] # Find the min error using the Euclidean Distance # %% # compute the distances between a test img & train imgs diff = unknown_img_W - train_W square_diff = np.square(diff) dist = np.sum(square_diff, axis=0) dist = dist ** (1/2) # %% # Find the index of min value = l l = np.argmin(dist) # %% [markdown] # #### project the test image to the eigenvector space # %% # a test image created by linear combination test_hat = np.dot(eigenfaces, unknown_img_W) reconstructed_test_hat = np.reshape(test_hat, (150, 130)) plt.imshow(reconstructed_test_hat, interpolation='nearest') #display the image plt.gray() #grayscale conversion plt.show() # %% [markdown] # ##### The picked test image & print it # %% print("The file name of the test image: ", test_image_list[j]) original_test_img = np.reshape(unknown_img, (150, 130)) plt.imshow(original_test_img, interpolation='nearest') #display the image plt.gray() #grayscale conversion plt.show() # %% [markdown] # #### Finding the best match(es) # %% [markdown] # Finding the best match # %% # The lth image vector in the training set train_img = train_I[:, l] # turn the image vector into original image original_img = np.reshape(train_img, (150, 130)) plt.imshow(original_img, interpolation='nearest') #display the image plt.gray() #grayscale conversion plt.show() # %% [markdown] # Finding the best 5 matches # %% five_smallest_idx = np.argpartition(dist, 4)[:5] five_smallest_sorted_idx = five_smallest_idx[np.argsort(dist[five_smallest_idx])] # %% fig_five = plt.figure(figsize=(10, 10)) for i in range(5): print(five_smallest_sorted_idx[i], ",", train_image_list[five_smallest_sorted_idx[i]], dist[five_smallest_sorted_idx[i]]) # The lth image vector in the training set train_img = train_I[:, five_smallest_sorted_idx[i]] # turn the image vector into original image original_img = np.reshape(train_img, (150, 130)) fig_five.add_subplot(1, 5, i+1) plt.imshow(original_img, interpolation='nearest') #display the image # %% [markdown] # #### Show an example of faces represented as a linear combination of k eigenvectors # %% idx_i = 400 # %% [markdown] # a new image created by linear combination # %% new_face = np.dot(eigenfaces, train_W[:, idx_i]) # new_face created by a linear combination reshaped_new_face = np.reshape(new_face, (150, 130)) plt.imshow(reshaped_new_face, interpolation='nearest') #display the image plt.gray() #grayscale conversion plt.show() # %% [markdown] # an original image # %% # The lth image vector in the training set train_img = train_I[:, idx_i] # turn the image vector into original image original_img = np.reshape(train_img, (150, 130)) plt.imshow(original_img, interpolation='nearest') #display the image plt.gray() #grayscale conversion plt.show() # %% [markdown] # ## Show multiple test images with their best matches # %% # create figure fig = plt.figure(figsize=(10, 5)) # setting values to rows and column variables rows = 2 # can be replaced columns = 6 # fixed for i in range(rows): # choose a random index & compute the distance rand_int = np.random.randint(0, len(test_image_list)) difference = test_W[:, rand_int].reshape(-1, 1) - train_W square_difference = np.square(difference) euclidean_dist = np.sum(square_difference, axis=0) ** (1/2) # Find the best five indices five_best_idx = np.argpartition(euclidean_dist, 4)[:5] five_best_sorted_idx = five_best_idx[np.argsort(euclidean_dist[five_best_idx])] five_best_sorted_idx if i == 0: five_best = np.array(five_best_sorted_idx) else: five_best = np.vstack((five_best, five_best_sorted_idx)) # the original test image test_img = test_I[:, rand_int] original_test_img = np.reshape(test_img, (150, 130)) fig.add_subplot(rows, columns, (columns * i)+1) plt.imshow(original_test_img, interpolation='nearest') plt.title("{}-th Test".format(rand_int)) # the five best images for i in range(rows): acc = (columns * i)+1 five_smallest_sorted_idx for j in range(1, columns): train_img = train_I[:, five_best[i, j-1]] original_img = np.reshape(train_img, (150, 130)) fig.add_subplot(rows, columns, acc+j) plt.imshow(original_img, interpolation='nearest') plt.title("The {}-th Best".format(j)) # %% [markdown] # ## Accuracy # %% def compute_weights(k): eigenfaces = normalized_best_eig_vecs[:, :k] # compute weight vectors for the test set test_W = np.dot(eigenfaces.T, normalized_test_I) # a set of weight vectors for the training set train_W = np.dot(eigenfaces.T, normalized_train_I) return test_W, train_W def count_matched_image(k): count = 0 test_W, train_W = compute_weights(k) for i in range(test_I.shape[1]): unknown_img_W = test_W[:, i].reshape(-1, 1) # return as a col vector unknown_img_name = test_image_list[i] # compute distances between a test img & train imgs diff = unknown_img_W - train_W square_diff = np.square(diff) dist = np.sum(square_diff, axis=0) dist = dist ** (1/2) # Find the index of min value = l l = np.argmin(dist) # The lth image name in the training set train_img_name = train_image_list[l] if (unknown_img_name[:5] == train_img_name[:5]): count += 1 return count # %% count_arr = [] for k_i in k_arr: count = count_matched_image(k_i) count_arr.append(count) # %% accuracy = np.array(count_arr) / test_I.shape[1] accuracy = np.round(accuracy, 2) # %% # plot fig_acc, ax = plt.subplots() ax.plot(k_arr, accuracy, linewidth=2.0) plt.show() # %% accuracy_for_k = count / test_I.shape[1] print("When k =", k, ", the accuracy is", accuracy_for_k)