import math import numpy as np EPSILON = 1e-4 #All code from HW1 which was irrelevant was removed #calculates the distance between two points def getDistance(point1, point2): """ Compute Euclidean distance between two points. Args: point1 (list[float]): First point. point2 (list[float]): Second point. Returns: float: Euclidean distance. """ d = len(point1) dist = 0 for i in range(d): diff = float(point1[i])-float(point2[i]) dist += diff*diff return math.sqrt(dist) #updates the centroids after we insert all points def update_center(clusters): """ Compute new centroids after we insert all points. Args: clusters (list[list[list[float]]]): List of clusters, each has a list of points. Returns: list[list[float]]: Updated centroids. """ d = len(clusters[0][0]) centers = [] for cluster in clusters: up_center= [] for i in range(d): s = 0 for point in cluster: s+=point[i] up_center.append(s/len(cluster)) centers.append(up_center) return centers #returns a tuple of clusters, clusts_ind= list of indexes of the clusters each point was assigned to #given the current centroids and the points, sorts the points #to their closest centroid def sort_points(points, centroids): """ Sorts points to nearest centroid. Args: points (list[list[float]]): Input points. centroids (list[list[float]]): Current centroids. Returns: clusters (list[list[list[float]]]): Points grouped by nearest centroid. clusts_ind (list[int]): List of indexes of the clusters each point was assigned to. """ clusters = [[] for i in range(len(centroids))] clusts_ind = [] for p_ind in range(len(points)): point = points[p_ind] minDist = math.inf centIdx = len(centroids) for i in range(len(centroids)): centroid = centroids[i] dist = getDistance(centroid, point) if dist=EPSILON: return False return True def final_clusters(points_arr, k): """ Cluster the data points into k clusters until convergence. Args: points_arr (numpy.ndarray): Input points as NumPy array. k (int): Number of clusters. Returns: numpy.ndarray: Cluster indices each point was assigned to. """ points = points_arr.tolist() centroids = [points[i] for i in range(k)] for i in range(300): #default iter is 300 clusters, clusters_indexs = sort_points(points, centroids) new_cents = update_center(clusters) if e_convergence(centroids, new_cents): break centroids = new_cents final_clusters_indexs = np.array(clusters_indexs) return final_clusters_indexs