import os import cv2 import random import numpy as np import matplotlib.pyplot as plt import math ##### Part 1: image preprossessing ##### def rgb2gray(img): """ 5 points Convert a colour image greyscale Use (R,G,B)=(0.299, 0.587, 0.114) as the weights for red, green and blue channels respectively :param img: numpy.ndarray (dtype: np.uint8) :return gray_image: numpy.ndarray (dtype:np.uint8) """ if len(img.shape) != 3: print('RGB Image should have 3 channels') return img_gray = np.ndarray(shape=(img.shape[0], img.shape[1]), dtype=np.uint8) for y in range(img.shape[0]): for x in range(img.shape[1]): img_gray[y][x] = 0.299 * img[y][x][0] + 0.587 * img[y][x][1] + 0.114 * img[y][x][2] return img_gray def gray2grad(img): """ 5 points Estimate the gradient map from the grayscale images by convolving with Sobel filters (horizontal and vertical gradients) and Sobel-like filters (gradients oriented at 45 and 135 degrees) The coefficients of Sobel filters are provided in the code below. :param img: numpy.ndarray :return img_grad_h: horizontal gradient map. numpy.ndarray :return img_grad_v: vertical gradient map. numpy.ndarray :return img_grad_d1: diagonal gradient map 1. numpy.ndarray :return img_grad_d2: diagonal gradient map 2. numpy.ndarray """ sobelh = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype = float) sobelv = np.array([[-1, -2, -1], [0, 0, 0], [1, 2, 1]], dtype = float) sobeld1 = np.array([[-2, -1, 0], [-1, 0, 1], [0, 1, 2]], dtype = float) sobeld2 = np.array([[0, -1, -2], [1, 0, -1], [2, 1, 0]], dtype = float) img_y = img.shape[0] img_x = img.shape[1] def helper(kernel): new_img = np.ndarray(shape=(img_y, img_x), dtype=float) flipped = np.flip(kernel) for y in range(img_y): for x in range(img_x): # elem_mult = [[kernel[u][v] * img[y-(u-1)][x-(v-1)] for u in range(2)] for v in range(2)] # new_img[y - 1][x - 1] = np.sum(elem_mult) window = img[y:y+3, x:x+3] new_img[y][x] = np.sum(window * flipped) # elem-wise mult return new_img img = pad_zeros(img, 1, 1, 1, 1) img_grad_h = helper(sobelh) img_grad_v = helper(sobelv) img_grad_d1 = helper(sobeld1) img_grad_d2 = helper(sobeld2) return img_grad_h, img_grad_v, img_grad_d1, img_grad_d2 def pad_zeros(img, pad_height_bef, pad_height_aft, pad_width_bef, pad_width_aft): """ 5 points Add a border of zeros around the input images so that the output size will match the input size after a convolution or cross-correlation operation. e.g., given matrix [[1]] with pad_height_bef=1, pad_height_aft=2, pad_width_bef=3 and pad_width_aft=4, obtains: [[0 0 0 0 0 0 0 0] [0 0 0 1 0 0 0 0] [0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0]] :param img: numpy.ndarray :param pad_height_bef: int :param pad_height_aft: int :param pad_width_bef: int :param pad_width_aft: int :return img_pad: numpy.ndarray. dtype is the same as the input img. """ height, width = img.shape[:2] new_height, new_width = (height + pad_height_bef + pad_height_aft), (width + pad_width_bef + pad_width_aft) img_pad = np.zeros((new_height, new_width)) if len(img.shape) == 2 else np.zeros((new_height, new_width, img.shape[2])) img_pad[pad_height_bef:pad_height_bef+height, pad_width_bef: pad_width_bef+width] = img return img_pad.astype(img.dtype) ##### Part 2: Normalized Cross Correlation ##### def normalized_cross_correlation(img, template): """ 10 points. Implement the cross-correlation operation in a naive 6 nested for-loops. The 6 loops include the height, width, channel of the output and height, width and channel of the template. :param img: numpy.ndarray. :param template: numpy.ndarray. :return response: numpy.ndarray. dtype: float """ Hi, Wi = img.shape[:2] Hk, Wk = template.shape[:2] Ho = Hi - Hk + 1 Wo = Wi - Wk + 1 ### YOUR CODE HERE img = img.astype(float) template = template.astype(float) no_of_channels = (1 if len(img.shape) == 2 else img.shape[2]) template_magnitude = np.linalg.norm(template) response = np.ndarray(shape=(Ho, Wo), dtype=float) for y in range(Ho): for x in range(Wo): xij = 0.0 wij_magnitude = 0.0 window = img[y:y+Hk, x:x+Wk] for u in range(Hk): for v in range(Wk): for c in range(no_of_channels): if no_of_channels == 1: xij += window[u][v] * template[u][v] wij_magnitude += window[u][v] ** 2 else: xij += window[u][v][c] * template[u][v][c] wij_magnitude += window[u][v][c] ** 2 norm_factor = 1 / (template_magnitude * math.sqrt(wij_magnitude)) xij = xij * norm_factor response[y][x] = xij return response def normalized_cross_correlation_fast(img, template): """ 10 points. Implement the cross correlation with 3 nested for-loops. The for-loop over the template is replaced with the element-wise multiplication between the kernel and the image regions. :param img: numpy.ndarray :param template: numpy.ndarray :return response: numpy.ndarray. dtype: float """ Hi, Wi = img.shape[:2] Hk, Wk = template.shape[:2] Ho = Hi - Hk + 1 Wo = Wi - Wk + 1 ###Your code here### img = img.astype(float) template = template.astype(float) no_of_channels = (1 if len(img.shape) == 2 else img.shape[2]) template_magnitude = np.linalg.norm(template) response = np.ndarray(shape=(Ho, Wo), dtype=float) for y in range(Ho): for x in range(Wo): xij = 0 window = img[y:y+Hk, x:x+Wk] wij_magnitude = np.linalg.norm(window) for c in range(no_of_channels): if no_of_channels > 1: xij += np.sum(window[:,:,c] * template[:,:,c]) else: xij += np.sum(window * template) norm_factor = 1 / (template_magnitude * wij_magnitude) response[y][x] = xij * norm_factor return response def normalized_cross_correlation_matrix(img, template): """ 10 points. Converts cross-correlation into a matrix multiplication operation to leverage optimized matrix operations. Please check the detailed instructions in the pdf file. :param img: numpy.ndarray :param template: numpy.ndarray :return response: numpy.ndarray. dtype: float """ Hi, Wi = img.shape[:2] Hk, Wk = template.shape[:2] Ho = Hi - Hk + 1 Wo = Wi - Wk + 1 ###Your code here### img = img.astype(float) template = template.astype(float) no_of_channels = (1 if len(img.shape) == 2 else img.shape[2]) template_magnitude = math.sqrt(np.sum(np.square(template))) # Reshaping template Fr = np.reshape(template, (no_of_channels * Hk * Wk, 1)) # Kernel of 1s norm_kernel = np.ones((no_of_channels * Hk * Wk, 1), dtype=float) # Reshaping input image Pr = np.zeros((Ho*Wo, Hk*Wk)) if no_of_channels == 1 else np.zeros((Ho*Wo, 3*Hk*Wk)) row_ratio = Wi - Wk + 1 # no of rows in reshaped image for each row in input image for row in range(Ho*Wo): row_in_original_img = row // row_ratio col_in_original_img = row % row_ratio window = img[row_in_original_img:row_in_original_img+Hk, col_in_original_img:col_in_original_img+Wk] Pr[row] = np.ravel(window) # this works for 3D array -> 1D array too window_magnitude = math.sqrt(np.matmul(np.square(Pr[row]), norm_kernel)) Pr[row] = Pr[row] / window_magnitude # norm_vector = np.sqrt(np.matmul(np.square(Pr), norm_kernel)) Xr = np.matmul(Pr, Fr, dtype=float) / template_magnitude response = np.reshape(Xr, (Ho, Wo)) return response ##### Part 3: Non-maximum Suppression ##### def non_max_suppression(response, suppress_range, threshold=None): """ 10 points Implement the non-maximum suppression for translation symmetry detection The general approach for non-maximum suppression is as follows: 1. Set a threshold τ; values in X<τ will not be considered. Set X<τ to 0. 2. While there are non-zero values in X a. Find the global maximum in X and record the coordinates as a local maximum. b. Set a small window of size w×w points centered on the found maximum to 0. 3. Return all recorded coordinates as the local maximum. :param response: numpy.ndarray, output from the normalized cross correlation :param suppress_range: a tuple of two ints (H_range, W_range). the points around the local maximum point within this range are set as 0. In this case, there are 2*H_range*2*W_range points including the local maxima are set to 0 :param threshold: int, points with value less than the threshold are set to 0 :return res: a sparse response map which has the same shape as response """ ###Your code here### if threshold != None: response[response < threshold] = 0.0 H_range, W_range = suppress_range res = np.zeros(response.shape) while np.count_nonzero(response) > 0: curr_y, curr_x = np.unravel_index(np.argmax(response), response.shape) res[curr_y][curr_x] = response[curr_y][curr_x] response[max(curr_y-H_range,0):min(curr_y+H_range,response.shape[0]), max(curr_x-W_range,0):min(curr_x+W_range,response.shape[1])] = 0 return res ##### Part 4: Question And Answer ##### def normalized_cross_correlation_ms(img, template): """ 10 points Please implement mean-subtracted cross correlation which corresponds to OpenCV TM_CCOEFF_NORMED. For simplicty, use the "fast" version. :param img: numpy.ndarray :param template: numpy.ndarray :return response: numpy.ndarray. dtype: float """ Hi, Wi = img.shape[:2] Hk, Wk = template.shape[:2] Ho = Hi - Hk + 1 Wo = Wi - Wk + 1 ###Your code here### img = img.astype(float) template = template.astype(float) no_of_channels = (1 if len(img.shape) == 2 else img.shape[2]) # Creating mean-subtracted template if no_of_channels > 1: for c in range(no_of_channels): templ_mean = np.mean(template[:,:,c]) template[:,:,c] = template[:,:,c] - templ_mean else: templ_mean = np.mean(template) template = template - templ_mean template_magnitude = np.linalg.norm(template) response = np.ndarray(shape=(Ho, Wo), dtype=float) for y in range(Ho): for x in range(Wo): xij = 0 window = img[y:y+Hk, x:x+Wk] wij_magnitude = 0.0 for c in range(no_of_channels): if no_of_channels > 1: window_mean = np.sum(window[:,:,c]) / (Hk*Wk) window_mean_subtracted = window[:,:,c] - window_mean xij += np.sum(window_mean_subtracted * template[:,:,c]) wij_magnitude += np.sum(np.square(window_mean_subtracted)) else: window_mean = np.sum(window) / (Hk*Wk) window_mean_subtracted = window - window_mean xij += np.sum(window_mean_subtracted * template) wij_magnitude += np.sum(np.square(window_mean_subtracted)) norm_factor = 1 / (template_magnitude * np.sqrt(wij_magnitude)) response[y][x] = xij * norm_factor return response ############################################### """Helper functions: You should not have to touch the following functions. """ def read_img(filename): ''' Read HxWxC image from the given filename :return img: numpy.ndarray, size (H, W, C) for RGB. The value is between [0, 255]. ''' img = cv2.imread(filename) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) return img def show_imgs(imgs, titles=None): ''' Display a list of images in the notebook cell. :param imgs: a list of images or a single image ''' if isinstance(imgs, list) and len(imgs) != 1: n = len(imgs) fig, axs = plt.subplots(1, n, figsize=(15,15)) for i in range(n): axs[i].imshow(imgs[i], cmap='gray' if len(imgs[i].shape) == 2 else None) if titles is not None: axs[i].set_title(titles[i]) else: img = imgs[0] if (isinstance(imgs, list) and len(imgs) == 1) else imgs plt.figure() plt.imshow(img, cmap='gray' if len(img.shape) == 2 else None) def show_img_with_squares(response, img_ori=None, rec_shape=None): ''' Draw small red rectangles of size defined by rec_shape around the non-zero points in the image. Display the rectangles and the image with rectangles in the notebook cell. :param response: numpy.ndarray. The input response should be a very sparse image with most of points as 0. The response map is from the non-maximum suppression. :param img_ori: numpy.ndarray. The original image where response is computed from :param rec_shape: a tuple of 2 ints. The size of the red rectangles. ''' response = response.copy() if img_ori is not None: img_ori = img_ori.copy() H, W = response.shape[:2] if rec_shape is None: h_rec, w_rec = 25, 25 else: h_rec, w_rec = rec_shape xs, ys = response.nonzero() for x, y in zip(xs, ys): response = cv2.rectangle(response, (y - h_rec//2, x - w_rec//2), (y + h_rec//2, x + w_rec//2), (255, 0, 0), 2) if img_ori is not None: img_ori = cv2.rectangle(img_ori, (y - h_rec//2, x - w_rec//2), (y + h_rec//2, x + w_rec//2), (0, 255, 0), 2) if img_ori is not None: show_imgs([response, img_ori]) else: show_imgs(response)