CodingChallenge / MatrixTask_Bonus_main.py
MatrixTask_Bonus_main.py
Raw
"""****************** Alternate data encoding solution below ********************"""


#++++++++++++++++++++
# Working Example
#++++++++++++++++++++
import csv
import numpy as np
from tabulate import tabulate
from scipy.sparse import csr_matrix

# Read the dataset from the CSV file
def read_dataset(file_path):
    adjacency_data = {}
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip header
        for row in reader:
            city_name = row[0]
            point1 = row[1]
            point2 = row[2]
            if city_name not in adjacency_data:
                adjacency_data[city_name] = set()
            adjacency_data[city_name].add((point1, point2))
    return adjacency_data

# Convert adjacency data to sparse matrix
def convert_to_sparse(adjacency_data):
    sparse_matrices = {}
    all_points = sorted(set().union(*[points for edges in adjacency_data.values() for points in edges]))
    size = len(all_points)
    for city_name, edges in adjacency_data.items():
        point_to_index = {point: idx for idx, point in enumerate(all_points)}
        adjacency_matrix = np.zeros((size, size), dtype=int)
        for point1, point2 in edges:
            idx1 = point_to_index[point1]
            idx2 = point_to_index[point2]
            adjacency_matrix[idx1][idx2] = 1
            adjacency_matrix[idx2][idx1] = 1  # Symmetric matrix
        # Convert the adjacency matrix to CSR sparse matrix
        sparse_matrix = csr_matrix(adjacency_matrix)
        sparse_matrices[city_name] = (all_points, sparse_matrix)
    return sparse_matrices

def AlternateEncoding():
    print('''
We can represent each edge as a tuple containing the city name and the pair of points that are adjacent. 
Below is example how the CSV file data might look like:

City,Point1,Point2
Hamburg,A,B
Hamburg,A,H
Hamburg,B,H
Berlin,A,B
Berlin,A,H
Berlin,H,I
Koeln,A,B
Koeln,A,C
Koeln,A,H
Koeln,B,C
Koeln,B,H
Koeln,B,I
Koeln,C,H
Koeln,C,I
Koeln,D,E
Koeln,D,I
Koeln,D,J
Koeln,E,F
Koeln,E,I
Koeln,F,G
Koeln,F,I
Koeln,G,I
Koeln,H,I

In this format, each row represents an edge between two points in a city. The first column specifies the city name, the second column specifies one endpoint of the edge, 
and the third column specifies the other endpoint. This encoding captures only the edges present in the graph, making it suitable for constructing sparse matrices.

Now, read this new format and construct sparse matrices accordingly.

Justification for using sparce representation shown above:
=========================================================

Memory Efficiency: The data format shown above is memory efficient as they only store non-zero entries
Computational Efficiency
Ease of Handling: Sparse matrix libraries in Python, such as SciPy's scipy.sparse module, provide efficient data structures and algorithms for working with sparse matrices. 
Data Integrity: Sparse matrices inherently maintain the integrity of the data, as only non-zero entries are stored explicitly
''')

# Visualize the sparse matrices
def visualize_sparse_matrices(sparse_matrices):
    for city_name, (points, sparse_matrix) in sparse_matrices.items():
        print('\n##############################')
        print(f"{city_name} adjacency matrix:")
        print('##############################')
        data = []
        for i in range(len(points)):
            row = [points[i]] + list(sparse_matrix.getrow(i).toarray()[0])
            data.append(row)
        headers = [""] + points
        print(tabulate(data, headers=headers, tablefmt="fancy_grid"))
        print()

# Main function
def main():
    # First We generated aadjacency_data.csv file with the data format shown above (just nodes which are adjacent in each city)
    file_path = 'adjacency_data.csv'

    # Read the data from adjacency_data.csv file
    adjacency_data = read_dataset(file_path)

    # Convert the data into sparce matrix
    sparse_matrices = convert_to_sparse(adjacency_data)
    visualize_sparse_matrices(sparse_matrices)

    AlternateEncoding()

if __name__ == "__main__":
    main()