lab1.py · computer-vision-lab1

import os
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt
import math

##### Part 1: image preprossessing #####

def rgb2gray(img):
    """
    5 points
    Convert a colour image greyscale
    Use (R,G,B)=(0.299, 0.587, 0.114) as the weights for red, green and blue channels respectively
    :param img: numpy.ndarray (dtype: np.uint8)
    :return gray_image: numpy.ndarray (dtype:np.uint8)
    """
    if len(img.shape) != 3:
        print('RGB Image should have 3 channels')
        return
    
    img_gray = np.ndarray(shape=(img.shape[0], img.shape[1]), dtype=np.uint8)
    for y in range(img.shape[0]):
        for x in range(img.shape[1]):
            img_gray[y][x] = 0.299 * img[y][x][0] + 0.587 * img[y][x][1] + 0.114 * img[y][x][2]
    return img_gray


def gray2grad(img):
    """
    5 points
    Estimate the gradient map from the grayscale images by convolving with Sobel filters (horizontal and vertical gradients) and Sobel-like filters (gradients oriented at 45 and 135 degrees)
    The coefficients of Sobel filters are provided in the code below.
    :param img: numpy.ndarray
    :return img_grad_h: horizontal gradient map. numpy.ndarray
    :return img_grad_v: vertical gradient map. numpy.ndarray
    :return img_grad_d1: diagonal gradient map 1. numpy.ndarray
    :return img_grad_d2: diagonal gradient map 2. numpy.ndarray
    """
    sobelh = np.array([[-1, 0, 1], 
                       [-2, 0, 2], 
                       [-1, 0, 1]], dtype = float)
    sobelv = np.array([[-1, -2, -1], 
                       [0, 0, 0], 
                       [1, 2, 1]], dtype = float)
    sobeld1 = np.array([[-2, -1, 0],
                        [-1, 0, 1],
                        [0,  1, 2]], dtype = float)
    sobeld2 = np.array([[0, -1, -2],
                        [1, 0, -1],
                        [2, 1, 0]], dtype = float)
    
    img_y = img.shape[0] 
    img_x = img.shape[1] 

    def helper(kernel):
        new_img = np.ndarray(shape=(img_y, img_x), dtype=float)
        flipped = np.flip(kernel)
        for y in range(img_y):
            for x in range(img_x):
                # elem_mult = [[kernel[u][v] * img[y-(u-1)][x-(v-1)] for u in range(2)] for v in range(2)]
                # new_img[y - 1][x - 1] = np.sum(elem_mult)
                window = img[y:y+3, x:x+3]
                new_img[y][x] = np.sum(window * flipped) # elem-wise mult
        return new_img

    img = pad_zeros(img, 1, 1, 1, 1)
    img_grad_h = helper(sobelh)
    img_grad_v = helper(sobelv)
    img_grad_d1 = helper(sobeld1)
    img_grad_d2 = helper(sobeld2)

    return img_grad_h, img_grad_v, img_grad_d1, img_grad_d2

def pad_zeros(img, pad_height_bef, pad_height_aft, pad_width_bef, pad_width_aft):
    """
    5 points
    Add a border of zeros around the input images so that the output size will match the input size after a convolution or cross-correlation operation.
    e.g., given matrix [[1]] with pad_height_bef=1, pad_height_aft=2, pad_width_bef=3 and pad_width_aft=4, obtains:
    [[0 0 0 0 0 0 0 0]
    [0 0 0 1 0 0 0 0]
    [0 0 0 0 0 0 0 0]
    [0 0 0 0 0 0 0 0]]
    :param img: numpy.ndarray
    :param pad_height_bef: int
    :param pad_height_aft: int
    :param pad_width_bef: int
    :param pad_width_aft: int
    :return img_pad: numpy.ndarray. dtype is the same as the input img. 
    """
    height, width = img.shape[:2]
    new_height, new_width = (height + pad_height_bef + pad_height_aft), (width + pad_width_bef + pad_width_aft)
    img_pad = np.zeros((new_height, new_width)) if len(img.shape) == 2 else np.zeros((new_height, new_width, img.shape[2]))

    img_pad[pad_height_bef:pad_height_bef+height, pad_width_bef: pad_width_bef+width] = img

    return img_pad.astype(img.dtype)




##### Part 2: Normalized Cross Correlation #####
def normalized_cross_correlation(img, template):
    """
    10 points.
    Implement the cross-correlation operation in a naive 6 nested for-loops. 
    The 6 loops include the height, width, channel of the output and height, width and channel of the template.
    :param img: numpy.ndarray.
    :param template: numpy.ndarray.
    :return response: numpy.ndarray. dtype: float
    """
    
    Hi, Wi = img.shape[:2]
    Hk, Wk = template.shape[:2]
    Ho = Hi - Hk + 1  
    Wo = Wi - Wk + 1

    ### YOUR CODE HERE
    img = img.astype(float)
    template = template.astype(float)

    no_of_channels = (1 if len(img.shape) == 2 else img.shape[2])
    template_magnitude = np.linalg.norm(template)

    response = np.ndarray(shape=(Ho, Wo), dtype=float)
    for y in range(Ho):
        for x in range(Wo):
            xij = 0.0
            wij_magnitude = 0.0
            window = img[y:y+Hk, x:x+Wk]
            for u in range(Hk):
                for v in range(Wk):
                    for c in range(no_of_channels):
                        if no_of_channels == 1:
                            xij += window[u][v] * template[u][v]
                            wij_magnitude += window[u][v] ** 2
                        else:
                            xij += window[u][v][c] * template[u][v][c]
                            wij_magnitude += window[u][v][c] ** 2
            norm_factor = 1 / (template_magnitude * math.sqrt(wij_magnitude))
            xij = xij * norm_factor 
            response[y][x] = xij
    return response



def normalized_cross_correlation_fast(img, template):
    """
    10 points.
    Implement the cross correlation with 3 nested for-loops. 
    The for-loop over the template is replaced with the element-wise multiplication between the kernel and the image regions.
    :param img: numpy.ndarray
    :param template: numpy.ndarray
    :return response: numpy.ndarray. dtype: float
    """
    Hi, Wi = img.shape[:2]
    Hk, Wk = template.shape[:2]
    Ho = Hi - Hk + 1
    Wo = Wi - Wk + 1

    ###Your code here###
    img = img.astype(float)
    template = template.astype(float)

    no_of_channels = (1 if len(img.shape) == 2 else img.shape[2])
    template_magnitude = np.linalg.norm(template)

    response = np.ndarray(shape=(Ho, Wo), dtype=float)
    for y in range(Ho):
        for x in range(Wo):
            xij = 0
            window = img[y:y+Hk, x:x+Wk]
            wij_magnitude = np.linalg.norm(window)
            for c in range(no_of_channels):
                if no_of_channels > 1:
                    xij += np.sum(window[:,:,c] * template[:,:,c])
                else:
                    xij += np.sum(window * template)
            norm_factor = 1 / (template_magnitude * wij_magnitude)
            response[y][x] = xij * norm_factor
    return response




def normalized_cross_correlation_matrix(img, template):
    """
    10 points.
    Converts cross-correlation into a matrix multiplication operation to leverage optimized matrix operations.
    Please check the detailed instructions in the pdf file.
    :param img: numpy.ndarray
    :param template: numpy.ndarray
    :return response: numpy.ndarray. dtype: float
    """
    Hi, Wi = img.shape[:2]
    Hk, Wk = template.shape[:2]
    Ho = Hi - Hk + 1
    Wo = Wi - Wk + 1

    ###Your code here###
    img = img.astype(float)
    template = template.astype(float)

    no_of_channels = (1 if len(img.shape) == 2 else img.shape[2])
    template_magnitude = math.sqrt(np.sum(np.square(template)))

    # Reshaping template
    Fr = np.reshape(template, (no_of_channels * Hk * Wk, 1))

    # Kernel of 1s
    norm_kernel = np.ones((no_of_channels * Hk * Wk, 1), dtype=float)

    # Reshaping input image
    Pr = np.zeros((Ho*Wo, Hk*Wk)) if no_of_channels == 1 else np.zeros((Ho*Wo, 3*Hk*Wk))
    row_ratio = Wi - Wk + 1   # no of rows in reshaped image for each row in input image
    for row in range(Ho*Wo): 
        row_in_original_img = row // row_ratio
        col_in_original_img = row % row_ratio
        window = img[row_in_original_img:row_in_original_img+Hk, col_in_original_img:col_in_original_img+Wk]
        Pr[row] = np.ravel(window)   # this works for 3D array -> 1D array too
        window_magnitude = math.sqrt(np.matmul(np.square(Pr[row]), norm_kernel))
        Pr[row] = Pr[row] / window_magnitude

    # norm_vector = np.sqrt(np.matmul(np.square(Pr), norm_kernel))
    Xr = np.matmul(Pr, Fr, dtype=float) / template_magnitude
    response = np.reshape(Xr, (Ho, Wo))
    return response


##### Part 3: Non-maximum Suppression #####

def non_max_suppression(response, suppress_range, threshold=None):
    """
    10 points
    Implement the non-maximum suppression for translation symmetry detection
    The general approach for non-maximum suppression is as follows:
	1. Set a threshold τ; values in X<τ will not be considered.  Set X<τ to 0.  
    2. While there are non-zero values in X
        a. Find the global maximum in X and record the coordinates as a local maximum.
        b. Set a small window of size w×w points centered on the found maximum to 0.
	3. Return all recorded coordinates as the local maximum.
    :param response: numpy.ndarray, output from the normalized cross correlation
    :param suppress_range: a tuple of two ints (H_range, W_range). 
                           the points around the local maximum point within this range are set as 0. In this case, there are 2*H_range*2*W_range points including the local maxima are set to 0
    :param threshold: int, points with value less than the threshold are set to 0
    :return res: a sparse response map which has the same shape as response
    """
    ###Your code here###
    if threshold != None:
        response[response < threshold] = 0.0

    H_range, W_range = suppress_range

    res = np.zeros(response.shape)
    while np.count_nonzero(response) > 0:
        curr_y, curr_x = np.unravel_index(np.argmax(response), response.shape)
        res[curr_y][curr_x] = response[curr_y][curr_x]
        response[max(curr_y-H_range,0):min(curr_y+H_range,response.shape[0]), max(curr_x-W_range,0):min(curr_x+W_range,response.shape[1])] = 0

    return res

##### Part 4: Question And Answer #####
    
def normalized_cross_correlation_ms(img, template):
    """
    10 points
    Please implement mean-subtracted cross correlation which corresponds to OpenCV TM_CCOEFF_NORMED.
    For simplicty, use the "fast" version.
    :param img: numpy.ndarray
    :param template: numpy.ndarray
    :return response: numpy.ndarray. dtype: float
    """
    Hi, Wi = img.shape[:2]
    Hk, Wk = template.shape[:2]
    Ho = Hi - Hk + 1
    Wo = Wi - Wk + 1

    ###Your code here###
    img = img.astype(float)
    template = template.astype(float)

    no_of_channels = (1 if len(img.shape) == 2 else img.shape[2])

    # Creating mean-subtracted template
    if no_of_channels > 1:
        for c in range(no_of_channels):
            templ_mean = np.mean(template[:,:,c])
            template[:,:,c] = template[:,:,c] - templ_mean
    else:
        templ_mean = np.mean(template)
        template = template - templ_mean
    template_magnitude = np.linalg.norm(template)

    response = np.ndarray(shape=(Ho, Wo), dtype=float)
    for y in range(Ho):
        for x in range(Wo):
            xij = 0
            window = img[y:y+Hk, x:x+Wk]
            wij_magnitude = 0.0
            for c in range(no_of_channels):
                if no_of_channels > 1:
                    window_mean = np.sum(window[:,:,c]) / (Hk*Wk)
                    window_mean_subtracted = window[:,:,c] - window_mean
                    xij += np.sum(window_mean_subtracted * template[:,:,c])
                    wij_magnitude += np.sum(np.square(window_mean_subtracted))
                else:
                    window_mean = np.sum(window) / (Hk*Wk)
                    window_mean_subtracted = window - window_mean
                    xij += np.sum(window_mean_subtracted * template) 
                    wij_magnitude += np.sum(np.square(window_mean_subtracted))
            norm_factor = 1 / (template_magnitude * np.sqrt(wij_magnitude))
            response[y][x] = xij * norm_factor
    return response





###############################################
"""Helper functions: You should not have to touch the following functions.
"""
def read_img(filename):
    '''
    Read HxWxC image from the given filename
    :return img: numpy.ndarray, size (H, W, C) for RGB. The value is between [0, 255].
    '''
    img = cv2.imread(filename)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

def show_imgs(imgs, titles=None):
    '''
    Display a list of images in the notebook cell.
    :param imgs: a list of images or a single image
    '''
    if isinstance(imgs, list) and len(imgs) != 1:
        n = len(imgs)
        fig, axs = plt.subplots(1, n, figsize=(15,15))
        for i in range(n):
            axs[i].imshow(imgs[i], cmap='gray' if len(imgs[i].shape) == 2 else None)
            if titles is not None:
                axs[i].set_title(titles[i])
    else:
        img = imgs[0] if (isinstance(imgs, list) and len(imgs) == 1) else imgs
        plt.figure()
        plt.imshow(img, cmap='gray' if len(img.shape) == 2 else None)

def show_img_with_squares(response, img_ori=None, rec_shape=None):
    '''
    Draw small red rectangles of size defined by rec_shape around the non-zero points in the image.
    Display the rectangles and the image with rectangles in the notebook cell.
    :param response: numpy.ndarray. The input response should be a very sparse image with most of points as 0.
                     The response map is from the non-maximum suppression.
    :param img_ori: numpy.ndarray. The original image where response is computed from
    :param rec_shape: a tuple of 2 ints. The size of the red rectangles.
    '''
    response = response.copy()
    if img_ori is not None:
        img_ori = img_ori.copy()
    H, W = response.shape[:2]
    if rec_shape is None:
        h_rec, w_rec = 25, 25
    else:
        h_rec, w_rec = rec_shape

    xs, ys = response.nonzero()
    for x, y in zip(xs, ys):
        response = cv2.rectangle(response, (y - h_rec//2, x - w_rec//2), (y + h_rec//2, x + w_rec//2), (255, 0, 0), 2)
        if img_ori is not None:
            img_ori = cv2.rectangle(img_ori, (y - h_rec//2, x - w_rec//2), (y + h_rec//2, x + w_rec//2), (0, 255, 0), 2)
        
    if img_ori is not None:
        show_imgs([response, img_ori])
    else:
        show_imgs(response)