"""****************** Alternate data encoding solution below ********************""" #++++++++++++++++++++ # Working Example #++++++++++++++++++++ import csv import numpy as np from tabulate import tabulate from scipy.sparse import csr_matrix # Read the dataset from the CSV file def read_dataset(file_path): adjacency_data = {} with open(file_path, 'r') as file: reader = csv.reader(file) next(reader) # Skip header for row in reader: city_name = row[0] point1 = row[1] point2 = row[2] if city_name not in adjacency_data: adjacency_data[city_name] = set() adjacency_data[city_name].add((point1, point2)) return adjacency_data # Convert adjacency data to sparse matrix def convert_to_sparse(adjacency_data): sparse_matrices = {} all_points = sorted(set().union(*[points for edges in adjacency_data.values() for points in edges])) size = len(all_points) for city_name, edges in adjacency_data.items(): point_to_index = {point: idx for idx, point in enumerate(all_points)} adjacency_matrix = np.zeros((size, size), dtype=int) for point1, point2 in edges: idx1 = point_to_index[point1] idx2 = point_to_index[point2] adjacency_matrix[idx1][idx2] = 1 adjacency_matrix[idx2][idx1] = 1 # Symmetric matrix # Convert the adjacency matrix to CSR sparse matrix sparse_matrix = csr_matrix(adjacency_matrix) sparse_matrices[city_name] = (all_points, sparse_matrix) return sparse_matrices def AlternateEncoding(): print(''' We can represent each edge as a tuple containing the city name and the pair of points that are adjacent. Below is example how the CSV file data might look like: City,Point1,Point2 Hamburg,A,B Hamburg,A,H Hamburg,B,H Berlin,A,B Berlin,A,H Berlin,H,I Koeln,A,B Koeln,A,C Koeln,A,H Koeln,B,C Koeln,B,H Koeln,B,I Koeln,C,H Koeln,C,I Koeln,D,E Koeln,D,I Koeln,D,J Koeln,E,F Koeln,E,I Koeln,F,G Koeln,F,I Koeln,G,I Koeln,H,I In this format, each row represents an edge between two points in a city. The first column specifies the city name, the second column specifies one endpoint of the edge, and the third column specifies the other endpoint. This encoding captures only the edges present in the graph, making it suitable for constructing sparse matrices. Now, read this new format and construct sparse matrices accordingly. Justification for using sparce representation shown above: ========================================================= Memory Efficiency: The data format shown above is memory efficient as they only store non-zero entries Computational Efficiency Ease of Handling: Sparse matrix libraries in Python, such as SciPy's scipy.sparse module, provide efficient data structures and algorithms for working with sparse matrices. Data Integrity: Sparse matrices inherently maintain the integrity of the data, as only non-zero entries are stored explicitly ''') # Visualize the sparse matrices def visualize_sparse_matrices(sparse_matrices): for city_name, (points, sparse_matrix) in sparse_matrices.items(): print('\n##############################') print(f"{city_name} adjacency matrix:") print('##############################') data = [] for i in range(len(points)): row = [points[i]] + list(sparse_matrix.getrow(i).toarray()[0]) data.append(row) headers = [""] + points print(tabulate(data, headers=headers, tablefmt="fancy_grid")) print() # Main function def main(): # First We generated aadjacency_data.csv file with the data format shown above (just nodes which are adjacent in each city) file_path = 'adjacency_data.csv' # Read the data from adjacency_data.csv file adjacency_data = read_dataset(file_path) # Convert the data into sparce matrix sparse_matrices = convert_to_sparse(adjacency_data) visualize_sparse_matrices(sparse_matrices) AlternateEncoding() if __name__ == "__main__": main()