import numpy as np import pandas as pd import sys import symnmf from kmeanshw1 import final_clusters from sklearn.metrics import silhouette_score #gets file name, returns a tuple of (X, n) when X=2D numpy array of data points, # n = num of points def get_points(f_name): """ Load data points from input file. Args: f_name (str): name of the file containing the data points. Returns: points (numpy.ndarray): 2D array of shape (n, d) with the read data points. num_p (int): Number of data points (rows). """ dfp = pd.read_csv(f_name, header=None) points= dfp.to_numpy() num_p = points.shape[0] return points, num_p def get_sym_culsters(goal, points_arr, n, k): """ Run SymNMF clustering and return cluster assignments vector. Args: goal (str): Clustering goal ("symnmf"). points_arr (numpy.ndarray): 2D array of shape (n, d) with input data points. n (int): Number of data points. k (int): Number of clusters. Returns: numpy.ndarray: 1D array of length n where the i-th entry is the cluster assignment index of the i-th point. """ points = points_arr.tolist() res = symnmf.ex_funcs(goal, points, n, k) res_arr = np.array(res, dtype=float) cluster_ind = np.argmax(res_arr, axis=1) return cluster_ind def main(): try: args = sys.argv goal = "symnmf" k = int(args[1]) filename = args[2] X, n= get_points(filename) Y_sym = get_sym_culsters(goal, X, n, k) # Calculate silhouette scores for SymNMF clustering sym_s_score = silhouette_score(X, Y_sym) Y_kmeans = final_clusters(X, k) # Calculate silhouette scores for KMeans clustering kmeans_s_score = silhouette_score(X, Y_kmeans) print(f"nmf: {sym_s_score:.4f}") print(f"kmeans: {kmeans_s_score:.4f}") except Exception as e: print("An Error Has Occurred") if __name__ == "__main__": main()