"""CSC108: Fall 2022 -- Assignment 3: Hypertension and Low Income This code is provided solely for the personal and private use of students taking the CSC108/CSCA08 course at the University of Toronto. Copying for purposes other than this use is expressly prohibited. All forms of distribution of this code, whether as given or with any changes, are expressly prohibited. All of the files in this directory and all subdirectories are: Copyright (c) 2022 Jacqueline Smith and David Liu """ from typing import TextIO import statistics # Note that this requires Python 3.10 ID = "id" HT_KEY = "hypertension" TOTAL = "total" LOW_INCOME = "low_income" # Indexes in the inner lists of hypertension data in CityData # HT is an abbreviation of hypertension, NBH is an abbreviation of neighbourhood HT_20_44 = 0 NBH_20_44 = 1 HT_45_64 = 2 NBH_45_64 = 3 HT_65_UP = 4 NBH_65_UP = 5 # columns in input files ID_COL = 0 NBH_NAME_COL = 1 POP_COL = 2 LI_POP_COL = 3 SAMPLE_DATA = { "West Humber-Clairville": { "id": 1, "hypertension": [703, 13291, 3741, 9663, 3959, 5176], "total": 33230, "low_income": 5950, }, "Mount Olive-Silverstone-Jamestown": { "id": 2, "hypertension": [789, 12906, 3578, 8815, 2927, 3902], "total": 32940, "low_income": 9690, }, "Thistletown-Beaumond Heights": { "id": 3, "hypertension": [220, 3631, 1047, 2829, 1349, 1767], "total": 10365, "low_income": 2005, }, "Rexdale-Kipling": { "id": 4, "hypertension": [201, 3669, 1134, 3229, 1393, 1854], "total": 10540, "low_income": 2140, }, "Elms-Old Rexdale": { "id": 5, "hypertension": [176, 3353, 1040, 2842, 948, 1322], "total": 9460, "low_income": 2315, }, } # constructed SAMPLE_DATA_2 for testing purposes SAMPLE_DATA_2 = { "West Humber-Clairville": { "id": 1, "hypertension": [703, 13291, 3741, 9663, 3959, 5176], "total": 33230, "low_income": 5950, }, "Mount Olive-Silverstone-Jamestown": { "id": 2, "hypertension": [789, 12906, 3578, 8815, 2927, 3902], "total": 32940, "low_income": 9690, }, "Thistletown-Beaumond Heights": { "id": 3, "hypertension": [220, 3631, 1047, 2829, 1349, 1767], "total": 10365, "low_income": 2005, }, "Rexdale-Kipling": { "id": 4, "hypertension": [201, 3669, 1134, 3229, 1393, 1854], "total": 10540, "low_income": 2140, }, "Rexdale-Kipling TWO": { "id": 5, "hypertension": [176, 3353, 1040, 2842, 948, 1322], "total": 10540, "low_income": 2315, }, } def get_hypertension_data(input_dict: dict, file: TextIO) -> None: """Modify input_dict such that it will contain the hypertension data from \ file. >>> input_file = open('hypertension_data_small.csv') >>> input_dict = {} >>> get_hypertension_data(input_dict, input_file) >>> input_dict {'West Humber-Clairville': {'id': 1, 'hypertension': [703, 13291, 3741, \ 9663, 3959, 5176]}, 'Mount Olive-Silverstone-Jamestown': {'id': 2, \ 'hypertension': [789, 12906, 3578, 8815, 2927, 3902]}, \ 'Thistletown-Beaumond Heights': {'id': 3, \ 'hypertension': [220, 3631, 1047, 2829, 1349, 1767]}, \ 'Rexdale-Kipling': {'id': 4, \ 'hypertension': [201, 3669, 1134, 3229, 1393, 1854]}, \ 'Elms-Old Rexdale': {'id': 5, \ 'hypertension': [176, 3353, 1040, 2842, 948, 1322]}} >>> input_file = open('hypertension_data_small.csv') >>> input_dict = {'West Humber-Clairville': {'id': 1, 'total': 33230, \ 'low_income': 5950}, 'Mount Olive-Silverstone-Jamestown': {'id': 2, \ 'total': 32940, 'low_income': 9690}, 'Thistletown-Beaumond Heights': {'id': 3, \ 'total': 10365, 'low_income': 2005}, 'Rexdale-Kipling': {'id': 4, \ 'total': 10540, 'low_income': 2140}, 'Elms-Old Rexdale': {'id': 5, \ 'total': 9460, 'low_income': 2315}} >>> get_hypertension_data(input_dict, input_file) >>> input_dict == SAMPLE_DATA True """ file.readline().strip() line = file.readline().strip().split(',') while line != ['']: if line[NBH_NAME_COL] not in input_dict: input_dict[line[NBH_NAME_COL]] = {} input_dict[line[NBH_NAME_COL]][ID] = int(line[ID_COL]) input_dict[line[NBH_NAME_COL]][HT_KEY] = [] for col in line[POP_COL:]: input_dict[line[NBH_NAME_COL]][HT_KEY].append(int(col)) else: input_dict[line[NBH_NAME_COL]][HT_KEY] = [] for col in line[POP_COL:]: input_dict[line[NBH_NAME_COL]][HT_KEY].append(int(col)) line = file.readline().strip().split(',') if __name__ == "__main__": import doctest doctest.testmod() def get_low_income_data(input_dict: dict, file: TextIO) -> None: """Modify input_dict such that it will contain the low income data from \ file. >>> input_file = open('low_income_small.csv') >>> input_dict = {} >>> get_low_income_data(input_dict, input_file) >>> input_dict {'West Humber-Clairville': {'id': 1, 'total': 33230, 'low_income': 5950}, \ 'Mount Olive-Silverstone-Jamestown': {'id': 2, 'total': 32940, \ 'low_income': 9690}, 'Thistletown-Beaumond Heights': {'id': 3, 'total': 10365, \ 'low_income': 2005}, 'Rexdale-Kipling': {'id': 4, 'total': 10540, \ 'low_income': 2140}, 'Elms-Old Rexdale': {'id': 5, 'total': 9460, \ 'low_income': 2315}} >>> input_file = open('low_income_small.csv') >>> input_dict = {"West Humber-Clairville": {"id": 1, \ "hypertension": [703, 13291, 3741, 9663, 3959, 5176]}, \ "Mount Olive-Silverstone-Jamestown": {"id": 2, \ "hypertension": [789, 12906, 3578, 8815, 2927, 3902]}, \ "Thistletown-Beaumond Heights": {"id": 3, \ "hypertension": [220, 3631, 1047, 2829, 1349, 1767]}, \ "Rexdale-Kipling": {"id": 4, \ "hypertension": [201, 3669, 1134, 3229, 1393, 1854]}, \ "Elms-Old Rexdale": {"id": 5, \ "hypertension": [176, 3353, 1040, 2842, 948, 1322]}} >>> get_low_income_data(input_dict, input_file) >>> input_dict == SAMPLE_DATA True """ file.readline().strip() line = file.readline().strip().split(',') while line != ['']: if line[NBH_NAME_COL] not in input_dict: input_dict[line[NBH_NAME_COL]] = {} input_dict[line[NBH_NAME_COL]][ID] = int(line[ID_COL]) input_dict[line[NBH_NAME_COL]][TOTAL] = int(line[POP_COL]) input_dict[line[NBH_NAME_COL]][LOW_INCOME] = int(line[LI_POP_COL]) else: input_dict[line[NBH_NAME_COL]][TOTAL] = int(line[POP_COL]) input_dict[line[NBH_NAME_COL]][LOW_INCOME] = int(line[LI_POP_COL]) line = file.readline().strip().split(',') if __name__ == "__main__": import doctest doctest.testmod() def get_bigger_neighbourhood(data: 'CityData', neighbourhood_1: str, neighbourhood_2: str) -> str: """Return the name of the neigbourhood, between neighbourhood_1 and neighbourhood_2 that has a higher population according to the low income data. If either neighbourhood_1 or neighbourhood_2 is not in CityData, assume population of that neighbourhood is 0. If the two neighbourhoods are the same size, return neighbourhood_1. Precondition: The two neighbourhood names are different >>> get_bigger_neighbourhood(SAMPLE_DATA,'Elms-Old Rexdale',\ 'Rexdale-Kipling') 'Rexdale-Kipling' >>> get_bigger_neighbourhood(SAMPLE_DATA, \ 'Mount Olive-Silverstone-Jamestown', 'Thistletown-Beaumond Heights') 'Mount Olive-Silverstone-Jamestown' """ if (neighbourhood_1 and neighbourhood_2) not in data: return neighbourhood_1 elif neighbourhood_1 not in data: return neighbourhood_2 elif neighbourhood_2 not in data: return neighbourhood_1 if data[neighbourhood_2][TOTAL] > data[neighbourhood_1][TOTAL]: return neighbourhood_2 else: return neighbourhood_1 if __name__ == '__main__': import doctest doctest.testmod() def build_neighbourhood_to_hypertension_rate(data: 'CityData') -> dict: """Return a dictionary that maps the name of a neighbourhood in CityData \ to its hypertension rate. >>> build_neighbourhood_to_hypertension_rate(SAMPLE_DATA) {'West Humber-Clairville': 0.2987202275151084,\ 'Mount Olive-Silverstone-Jamestown': 0.28466612028255867,\ 'Thistletown-Beaumond Heights': 0.31797739151574084,\ 'Rexdale-Kipling': 0.3117001828153565,\ 'Elms-Old Rexdale': 0.2878808035120394} """ neighbourhood_to_hypertension_rate = {} for neighbourhood in data: total_hypertension = data[neighbourhood][HT_KEY][HT_20_44] + \ data[neighbourhood][HT_KEY][HT_45_64] + \ data[neighbourhood][HT_KEY][HT_65_UP] total_neighbourhood = data[neighbourhood][HT_KEY][NBH_20_44] + \ data[neighbourhood][HT_KEY][NBH_45_64] + \ data[neighbourhood][HT_KEY][NBH_65_UP] hypertension_rate = total_hypertension / total_neighbourhood neighbourhood_to_hypertension_rate[neighbourhood] = hypertension_rate return neighbourhood_to_hypertension_rate if __name__ == '__main__': import doctest doctest.testmod() def get_high_hypertension_rate(data: 'CityData', threshold: float) -> list[tuple[str, float]]: """Return a list of tuples of neighbourhoods with a hypertension rate\ greater than or equal to the threshold in the form (neighbour_name, hypertension_rate). Precondition: 0.0 <= threshold <= 1.0 >>> get_high_hypertension_rate(SAMPLE_DATA, 0.3) [('Thistletown-Beaumond Heights', 0.31797739151574084),\ ('Rexdale-Kipling', 0.3117001828153565)] >>> get_high_hypertension_rate(SAMPLE_DATA, 0.2) [('West Humber-Clairville', 0.2987202275151084),\ ('Mount Olive-Silverstone-Jamestown', 0.28466612028255867),\ ('Thistletown-Beaumond Heights', 0.31797739151574084),\ ('Rexdale-Kipling', 0.3117001828153565),\ ('Elms-Old Rexdale', 0.2878808035120394)] """ neighbourhood_to_hypertension_rate = \ build_neighbourhood_to_hypertension_rate(data) high_hypertension = [] for nbh in neighbourhood_to_hypertension_rate: if neighbourhood_to_hypertension_rate[nbh] >= threshold: high_hypertension.append((nbh, neighbourhood_to_hypertension_rate[nbh])) return high_hypertension if __name__ == '__main__': import doctest doctest.testmod() def build_neighbourhood_to_low_income_rate(data: 'CityData') -> dict: """Return a dictionary that maps the name of a neighbourhood in CityData \ to its low income rate. >>> build_neighbourhood_to_low_income_rate(SAMPLE_DATA) {'West Humber-Clairville': 0.1790550707192296,\ 'Mount Olive-Silverstone-Jamestown': 0.2941712204007286,\ 'Thistletown-Beaumond Heights': 0.19343945972021226,\ 'Rexdale-Kipling': 0.2030360531309298,\ 'Elms-Old Rexdale': 0.24471458773784355} """ neighbourhood_to_low_income_rate = {} for nbh in data: low_income_rate = data[nbh][LOW_INCOME] / data[nbh][TOTAL] neighbourhood_to_low_income_rate[nbh] = low_income_rate return neighbourhood_to_low_income_rate if __name__ == '__main__': import doctest doctest.testmod() def get_ht_to_low_income_ratios(data: 'CityData') -> dict[str, float]: """Return a dictionary where the keys are the names of neighbourhoods as \ in CityData, and the values are the ratio of the hypertension rate to the \ low income rate for that neighbourhood. >>> get_ht_to_low_income_ratios(SAMPLE_DATA) {'West Humber-Clairville': 1.6683148168616895,\ 'Mount Olive-Silverstone-Jamestown': 0.9676885451091314,\ 'Thistletown-Beaumond Heights': 1.6438083107534431,\ 'Rexdale-Kipling': 1.5351962275111484,\ 'Elms-Old Rexdale': 1.1763941257986577} """ neighbourhood_to_hypertension_rate = \ build_neighbourhood_to_hypertension_rate(data) neighbourhood_to_low_income_rate = \ build_neighbourhood_to_low_income_rate(data) ht_to_low_income_ratios = {} for nbh in data: ht_low_income_ratio = neighbourhood_to_hypertension_rate[nbh] / \ neighbourhood_to_low_income_rate[nbh] ht_to_low_income_ratios[nbh] = ht_low_income_ratio return ht_to_low_income_ratios if __name__ == '__main__': import doctest doctest.testmod() def calculate_ht_rates_by_age_group(data: 'CityData', nbh_name: str) -> tuple[float, float, float]: """Return a tuple of three values representing the hypertension rate for \ each of the three age groups in nbh_name as a percentage. >>> calculate_ht_rates_by_age_group(SAMPLE_DATA, 'Elms-Old Rexdale') (5.24903071875932, 36.593947923997185, 71.70953101361573) """ ht_rate_20_44 = (data[nbh_name][HT_KEY][HT_20_44] / data[nbh_name][HT_KEY] [NBH_20_44]) * 100 ht_rate_45_64 = (data[nbh_name][HT_KEY][HT_45_64] / data[nbh_name][HT_KEY] [NBH_45_64]) * 100 ht_rate_65_up = (data[nbh_name][HT_KEY][HT_65_UP] / data[nbh_name][HT_KEY] [NBH_65_UP]) * 100 return (ht_rate_20_44, ht_rate_45_64, ht_rate_65_up) if __name__ == '__main__': import doctest doctest.testmod() def get_age_standardized_ht_rate(ndata: 'CityData', name: str) -> float: """Return the age standardized hypertension rate from the neighbourhood in ndata matching the given name. Precondition: name is in ndata >>> get_age_standardized_ht_rate(SAMPLE_DATA, 'Elms-Old Rexdale') 24.44627521389894 >>> get_age_standardized_ht_rate(SAMPLE_DATA, 'Rexdale-Kipling') 24.72562462246556 """ rates = calculate_ht_rates_by_age_group(ndata, name) # These rates are normalized for only 20+ ages, using the census data # that our datasets are based on. canada_20_44 = 11_199_830 / 19_735_665 # Number of 20-44 / Number of 20+ canada_45_64 = 5_365_865 / 19_735_665 # Number of 45-64 / Number of 20+ canada_65_plus = 3_169_970 / 19_735_665 # Number of 65+ / Number of 20+ return (rates[0] * canada_20_44 + rates[1] * canada_45_64 + rates[2] * canada_65_plus) if __name__ == "__main__": import doctest doctest.testmod() # Using the small data files small_data = {} # Add hypertension data ht_file = open("hypertension_data_small.csv") get_hypertension_data(small_data, ht_file) ht_file.close() # Add low income data li_file = open("low_income_small.csv") get_low_income_data(small_data, li_file) li_file.close() # Created dictionary should be the same as SAMPLE_DATA print(small_data == SAMPLE_DATA) def get_stats_summary(data: 'CityData') -> float: """Return the correlation between age standardized hypertension rates and \ low income rates across all neighbourhoods. >>> get_stats_summary(SAMPLE_DATA) 0.28509539188554994 """ ht_rates = [] li_rates = [] nbh_to_li = build_neighbourhood_to_low_income_rate(data) for nbh in data: ht_rate = get_age_standardized_ht_rate(data, nbh) ht_rates.append(ht_rate) li_rate = nbh_to_li[nbh] li_rates.append(li_rate) return statistics.correlation(ht_rates, li_rates) if __name__ == "__main__": import doctest doctest.testmod() def order_by_ht_rate(data: 'CityData') -> list[str]: """Return a list of the names of the neighbourhoods, ordered from lowest to\ highest age-standardized hypertension rate. Precondition: Every neighbourhood has a unique hypertension rate. >>> order_by_ht_rate(SAMPLE_DATA) ['Elms-Old Rexdale', 'Rexdale-Kipling', 'Thistletown-Beaumond Heights', \ 'West Humber-Clairville', 'Mount Olive-Silverstone-Jamestown'] """ ht_rates = [] nbh_to_ht_rates = {} for nbh in data: ht_rate = get_age_standardized_ht_rate(data, nbh) ht_rates.append(ht_rate) nbh_to_ht_rates[nbh] = ht_rate ht_rates.sort() nbh_sorted = [] for rate in ht_rates: for nbh in data: if rate == nbh_to_ht_rates[nbh]: nbh_sorted.append(nbh) return nbh_sorted if __name__ == "__main__": import doctest doctest.testmod()